Initial Push

This commit is contained in:
Oli Passey
2025-06-27 10:36:26 +01:00
parent cf1023c14a
commit 191184ba5e
31 changed files with 4531 additions and 68 deletions

View File

@@ -0,0 +1,102 @@
"""
Scheduled price scraping script for cron jobs
"""
import sys
import os
import asyncio
import logging
from datetime import datetime
# Add the parent directory to sys.path to import our modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.config import Config
from src.database import DatabaseManager
from src.scraper_manager import ScraperManager
from src.notification import NotificationManager
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('scheduled_scraping.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
async def run_scheduled_scraping():
"""Run the scheduled price scraping."""
try:
logger.info("=== Starting scheduled price scraping ===")
# Initialize components
config = Config()
db_manager = DatabaseManager(config.database_path)
scraper_manager = ScraperManager(config)
notification_manager = NotificationManager(config)
# Get all products
products = db_manager.get_all_products()
if not products:
logger.warning("No products found in database")
return
logger.info(f"Found {len(products)} products to scrape")
# Scrape all products
results = await scraper_manager.scrape_all_products(products)
# Process results
total_success = 0
total_failed = 0
price_alerts = []
for product_id, site_results in results.items():
product = db_manager.get_product(product_id)
for site_name, result in site_results.items():
if result['success']:
total_success += 1
# Save to database
db_manager.save_price_history(
product_id=product_id,
site_name=site_name,
price=result['price'],
currency=result.get('currency', 'USD'),
availability=result.get('availability', True),
timestamp=datetime.now()
)
# Check for price alerts
if product and product['target_price'] and result['price'] <= product['target_price']:
price_alerts.append({
'product': product,
'site': site_name,
'current_price': result['price'],
'target_price': product['target_price']
})
logger.info(f"Price alert: {product['name']} on {site_name} - ${result['price']:.2f}")
else:
total_failed += 1
logger.error(f"Failed to scrape {product['name']} on {site_name}: {result.get('error', 'Unknown error')}")
# Send notifications for price alerts
if price_alerts:
await notification_manager.send_price_alerts(price_alerts)
logger.info(f"Sent notifications for {len(price_alerts)} price alerts")
logger.info(f"Scraping completed: {total_success} successful, {total_failed} failed")
logger.info(f"Found {len(price_alerts)} price alerts")
except Exception as e:
logger.error(f"Error during scheduled scraping: {e}", exc_info=True)
raise
if __name__ == "__main__":
asyncio.run(run_scheduled_scraping())