Files
price-tracker/debug_special_pricing.py
Oli Passey 5726183115 scrape fix
2025-06-27 17:25:56 +01:00

161 lines
5.8 KiB
Python

#!/usr/bin/env python3
"""
Special Pricing Debug Tool for UK Price Tracker
This tool helps debug and monitor special pricing detection on real websites.
It can be used to test URLs and see exactly what pricing information is being detected.
"""
import sys
import os
import asyncio
import logging
import argparse
from typing import Dict, Any
# Add the src directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
from uk_scraper import UKCateringScraper
from config import Config
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def detect_site_from_url(url: str) -> str:
"""Detect which site the URL belongs to."""
if 'jjfoodservice.com' in url:
return 'jjfoodservice'
elif 'atoz-catering.co.uk' in url:
return 'atoz_catering'
elif 'amazon.co.uk' in url:
return 'amazon_uk'
else:
return 'unknown'
async def debug_url_pricing(url: str, verbose: bool = False):
"""Debug pricing extraction for a specific URL."""
config = Config()
scraper = UKCateringScraper(config)
site_name = detect_site_from_url(url)
print(f"Debugging URL: {url}")
print(f"Detected site: {site_name}")
print("-" * 60)
if site_name == 'unknown':
print("❌ Unknown site - cannot process")
return
try:
# Fetch the page content
print("🌐 Fetching page content...")
html_content = await scraper._fetch_page(url)
if not html_content:
print("❌ Failed to fetch page content")
return
print("✅ Page content fetched successfully")
# Parse with BeautifulSoup
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
# Debug special pricing detection
print("\n🔍 Looking for special offer prices...")
special_prices = scraper._find_special_offer_prices(soup, site_name)
if special_prices:
print(f"✅ Found {len(special_prices)} special offer prices:")
for price, selector in special_prices:
print(f" £{price} (found with: {selector})")
best_special_price = min(price for price, _ in special_prices)
print(f"🎯 Best special offer price: £{best_special_price}")
else:
print("❌ No special offer prices found")
# Test the main extraction method
print(f"\n🔍 Testing {site_name} extraction method...")
if site_name == 'jjfoodservice':
result = scraper._extract_jjfoodservice_data(soup)
elif site_name == 'atoz_catering':
result = scraper._extract_atoz_catering_data(soup)
elif site_name == 'amazon_uk':
result = scraper._extract_amazon_uk_data(soup)
print(f"✅ Extraction result:")
print(f" Price: £{result['price']}" if result['price'] else " Price: Not found")
print(f" Title: {result.get('title', 'Not found')}")
print(f" Available: {result.get('availability', 'Unknown')}")
print(f" Currency: {result.get('currency', 'Unknown')}")
# If verbose, show more debugging info
if verbose:
print(f"\n🔍 Verbose debugging for {site_name}...")
# Get site selectors from config
site_config = config.get_site_config(site_name)
if site_config and 'selectors' in site_config:
selectors = site_config['selectors']
# Test each selector type
for selector_type, selector_list in selectors.items():
print(f"\n Testing {selector_type} selectors:")
for selector in selector_list:
try:
elements = soup.select(selector)
if elements:
print(f"{selector} -> Found {len(elements)} elements")
for i, elem in enumerate(elements[:3]): # Show first 3
text = elem.get_text(strip=True)[:100] # Truncate long text
print(f" [{i+1}] {text}")
else:
print(f"{selector} -> No elements found")
except Exception as e:
print(f" ⚠️ {selector} -> Error: {e}")
# Test the full scraping method
print(f"\n🔍 Testing full scrape_product_price method...")
full_result = await scraper.scrape_product_price(url, site_name)
print("✅ Full scraping result:")
print(f" Success: {full_result['success']}")
print(f" Price: £{full_result['price']}" if full_result['price'] else " Price: Not found")
print(f" Error: {full_result.get('error', 'None')}")
except Exception as e:
print(f"❌ Error during debugging: {e}")
if verbose:
import traceback
traceback.print_exc()
def main():
"""Main function to run the debug tool."""
parser = argparse.ArgumentParser(description='Debug special pricing detection for UK price tracker')
parser.add_argument('url', help='URL to debug')
parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output')
parser.add_argument('--test-selectors', action='store_true', help='Test all selectors from config')
args = parser.parse_args()
print("UK Price Tracker - Special Pricing Debug Tool")
print("=" * 60)
# Run the debugging
asyncio.run(debug_url_pricing(args.url, args.verbose))
if __name__ == "__main__":
main()