This commit is contained in:
Oli Passey
2025-07-07 21:13:56 +01:00
parent 4702ff0c22
commit 9593edbce9
2 changed files with 244 additions and 169 deletions

View File

@@ -1,9 +1,8 @@
# Use Python 3.11 slim image (revert from 3.12) # Use Python 3.11 slim image (revert from 3.12)
FROM python:3.11-slim FROM python:3.11-slim
# Install cron and other dependencies # Install system dependencies (remove cron)
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
cron \
gcc \ gcc \
curl \ curl \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
@@ -22,7 +21,8 @@ ENV DATABASE_PATH=/app/data/price_tracker.db \
DELAY_BETWEEN_REQUESTS=2 \ DELAY_BETWEEN_REQUESTS=2 \
MAX_CONCURRENT_REQUESTS=1 \ MAX_CONCURRENT_REQUESTS=1 \
REQUEST_TIMEOUT=30 \ REQUEST_TIMEOUT=30 \
RETRY_ATTEMPTS=3 RETRY_ATTEMPTS=3 \
WEBHOOK_SECRET=your-secret-key-here
# Copy requirements first for better caching # Copy requirements first for better caching
COPY requirements.txt . COPY requirements.txt .
@@ -41,178 +41,13 @@ RUN mkdir -p /app/data /var/log && \
# Copy application code # Copy application code
COPY . . COPY . .
# Create the daily scraper script # Create startup script without cron
RUN echo '#!/usr/bin/env python3\n\
import sys\n\
import os\n\
import asyncio\n\
import logging\n\
from datetime import datetime\n\
\n\
# Configure logging\n\
logging.basicConfig(\n\
level=logging.INFO,\n\
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",\n\
handlers=[\n\
logging.FileHandler("/var/log/price_scraper.log"),\n\
logging.StreamHandler()\n\
]\n\
)\n\
\n\
logger = logging.getLogger(__name__)\n\
\n\
async def main():\n\
try:\n\
from src.config import Config\n\
from src.database import DatabaseManager\n\
from src.scraper_manager import ScraperManager\n\
from src.notification import NotificationManager\n\
from src.shopping_list import AutoShoppingListGenerator\n\
\n\
logger.info("Starting scheduled price scraping")\n\
\n\
config = Config()\n\
if config.has_config_error():\n\
logger.error(f"Configuration error: {config.get_config_error()}")\n\
return\n\
\n\
db_manager = DatabaseManager(config.database_path)\n\
scraper_manager = ScraperManager(config)\n\
notification_manager = NotificationManager(config)\n\
shopping_list_generator = AutoShoppingListGenerator(db_manager)\n\
\n\
products = db_manager.get_all_products()\n\
if not products:\n\
logger.warning("No products found to scrape")\n\
return\n\
\n\
logger.info(f"Scraping {len(products)} products")\n\
results = await scraper_manager.scrape_all_products(products)\n\
\n\
total = sum(len(sites) for sites in results.values())\n\
successful = sum(1 for sites in results.values() for result in sites.values() if result["success"])\n\
failed = total - successful\n\
\n\
logger.info(f"Scraping complete: {successful}/{total} successful")\n\
\n\
# Save results and collect price alerts\n\
price_alerts = []\n\
for product_id, site_results in results.items():\n\
product = db_manager.get_product(product_id)\n\
for site_name, result in site_results.items():\n\
if result["success"]:\n\
# Save to database\n\
db_manager.save_price_history(\n\
product_id=product_id,\n\
site_name=site_name,\n\
price=result["price"],\n\
availability=result.get("availability", True),\n\
timestamp=datetime.now()\n\
)\n\
\n\
# Check for price alerts\n\
if product and product.get("target_price") and result["price"] <= product["target_price"]:\n\
price_alerts.append({\n\
"product": product,\n\
"site": site_name,\n\
"current_price": result["price"],\n\
"target_price": product["target_price"],\n\
"url": result.get("url", "")\n\
})\n\
\n\
# Send price alerts if any\n\
if price_alerts:\n\
alert_message = "Price Alerts:\\n\\n"\n\
for alert in price_alerts:\n\
alert_message += f"🎯 {alert[\"product\"][\"name\"]}\\n"\n\
alert_message += f" Store: {alert[\"site\"]}\\n"\n\
alert_message += f" Price: £{alert[\"current_price\"]} (Target: £{alert[\"target_price\"]})\\n"\n\
alert_message += f" URL: {alert[\"url\"]}\\n\\n"\n\
\n\
await notification_manager.send_notification(\n\
subject=f"Price Alert: {len(price_alerts)} item(s) on sale!",\n\
message=alert_message\n\
)\n\
logger.info(f"Sent price alerts for {len(price_alerts)} items")\n\
\n\
# Generate and send daily shopping list\n\
try:\n\
shopping_lists = shopping_list_generator.generate_all_shopping_lists()\n\
if shopping_lists:\n\
shopping_message = "Daily Shopping List (Best Prices):\\n\\n"\n\
total_savings = 0\n\
\n\
for store_name, store_list in shopping_lists.items():\n\
if store_list.items:\n\
shopping_message += f"🏪 {store_name.upper()}:\\n"\n\
store_total = 0\n\
for item in store_list.items:\n\
shopping_message += f" • {item.product_name} - £{item.current_price}\\n"\n\
store_total += item.current_price\n\
if item.savings_amount > 0:\n\
total_savings += item.savings_amount\n\
shopping_message += f" Subtotal: £{store_total:.2f}\\n\\n"\n\
\n\
if total_savings > 0:\n\
shopping_message += f"💰 Total Savings: £{total_savings:.2f}\\n"\n\
\n\
await notification_manager.send_notification(\n\
subject="Daily Shopping List - Best Prices",\n\
message=shopping_message\n\
)\n\
logger.info("Sent daily shopping list")\n\
except Exception as e:\n\
logger.error(f"Failed to generate shopping list: {e}")\n\
\n\
# Send scraping summary\n\
summary_message = f"Daily Price Scraping Summary:\\n\\n"\n\
summary_message += f"📊 Products scraped: {len(products)}\\n"\n\
summary_message += f"✅ Successful: {successful}\\n"\n\
summary_message += f"❌ Failed: {failed}\\n"\n\
summary_message += f"🎯 Price alerts: {len(price_alerts)}\\n"\n\
summary_message += f"🕐 Completed at: {datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")}\\n"\n\
\n\
await notification_manager.send_notification(\n\
subject="Daily Price Scraping Complete",\n\
message=summary_message\n\
)\n\
logger.info("Sent scraping summary")\n\
\n\
except Exception as e:\n\
logger.error(f"Scheduled scraping failed: {str(e)}", exc_info=True)\n\
\n\
# Send error notification\n\
try:\n\
from src.config import Config\n\
from src.notification import NotificationManager\n\
config = Config()\n\
notification_manager = NotificationManager(config)\n\
await notification_manager.send_notification(\n\
subject="Price Scraping Failed",\n\
message=f"Daily price scraping failed with error:\\n\\n{str(e)}"\n\
)\n\
except:\n\
pass # If notification also fails, just log\n\
\n\
if __name__ == "__main__":\n\
asyncio.run(main())\n\
' > /app/daily_scraper.py && chmod +x /app/daily_scraper.py
# Create cron job - runs daily at 8 AM
RUN echo "0 15 * * * cd /app && python daily_scraper.py >> /var/log/cron.log 2>&1" > /etc/cron.d/price-tracker
RUN chmod 0644 /etc/cron.d/price-tracker
RUN crontab /etc/cron.d/price-tracker
# Create startup script that ensures directories exist and have correct permissions
RUN echo '#!/bin/bash\n\ RUN echo '#!/bin/bash\n\
# Ensure data directory exists and has correct permissions\n\ # Ensure data directory exists and has correct permissions\n\
mkdir -p /app/data /var/log\n\ mkdir -p /app/data /var/log\n\
chown -R tracker:tracker /app/data /var/log\n\ chown -R tracker:tracker /app/data /var/log\n\
chmod 755 /app/data /var/log\n\ chmod 755 /app/data /var/log\n\
\n\ \n\
# Start cron in background\n\
cron\n\
\n\
# Switch to non-root user and start web server\n\ # Switch to non-root user and start web server\n\
exec su tracker -c "python main.py --mode web"\n\ exec su tracker -c "python main.py --mode web"\n\
' > /app/start.sh && chmod +x /app/start.sh ' > /app/start.sh && chmod +x /app/start.sh

View File

@@ -13,6 +13,10 @@ import plotly
import plotly.graph_objs as go import plotly.graph_objs as go
import pandas as pd import pandas as pd
import os import os
import hmac
import hashlib
from functools import wraps
from flask import request, jsonify
from .database import DatabaseManager from .database import DatabaseManager
from .config import Config from .config import Config
@@ -474,4 +478,240 @@ def create_app():
except Exception as e: except Exception as e:
return jsonify({'error': str(e)}), 500 return jsonify({'error': str(e)}), 500
# Add webhook authentication decorator
def webhook_auth_required(f):
@wraps(f)
def decorated_function(*args, **kwargs):
# Get webhook secret from environment
webhook_secret = os.environ.get('WEBHOOK_SECRET', 'your-secret-key-here')
# Check for secret in header or query parameter
provided_secret = request.headers.get('X-Webhook-Secret') or request.args.get('secret')
if not provided_secret or provided_secret != webhook_secret:
return jsonify({'error': 'Unauthorized'}), 401
return f(*args, **kwargs)
return decorated_function
# Add webhook routes
@app.route('/webhook/scrape', methods=['POST', 'GET'])
@webhook_auth_required
def webhook_scrape():
"""Webhook endpoint to trigger price scraping"""
try:
# Run scraping in background
import asyncio
from .scraper_manager import ScraperManager
from .notification import NotificationManager
async def run_scrape():
try:
logger.info("Webhook triggered price scraping")
config = Config()
if config.has_config_error():
logger.error(f"Configuration error: {config.get_config_error()}")
return {'error': 'Configuration error'}
db_manager = DatabaseManager(config.database_path)
scraper_manager = ScraperManager(config)
notification_manager = NotificationManager(config)
products = db_manager.get_all_products()
if not products:
logger.warning("No products found to scrape")
return {'message': 'No products to scrape'}
logger.info(f"Scraping {len(products)} products")
results = await scraper_manager.scrape_all_products(products)
total = sum(len(sites) for sites in results.values())
successful = sum(1 for sites in results.values() for result in sites.values() if result['success'])
failed = total - successful
logger.info(f"Scraping complete: {successful}/{total} successful")
# Save results and collect price alerts
price_alerts = []
for product_id, site_results in results.items():
product = db_manager.get_product(product_id)
for site_name, result in site_results.items():
if result['success']:
# Save to database
db_manager.save_price_history(
product_id=product_id,
site_name=site_name,
price=result['price'],
availability=result.get('availability', True),
timestamp=datetime.now()
)
# Check for price alerts
if product and product.get('target_price') and result['price'] <= product['target_price']:
price_alerts.append({
'product': product,
'site': site_name,
'current_price': result['price'],
'target_price': product['target_price'],
'url': result.get('url', '')
})
# Send price alerts if any
if price_alerts:
alert_message = "Price Alerts:\n\n"
for alert in price_alerts:
alert_message += f"🎯 {alert['product']['name']}\n"
alert_message += f" Store: {alert['site']}\n"
alert_message += f" Price: £{alert['current_price']} (Target: £{alert['target_price']})\n"
alert_message += f" URL: {alert['url']}\n\n"
await notification_manager.send_notification(
subject=f"Price Alert: {len(price_alerts)} item(s) on sale!",
message=alert_message
)
logger.info(f"Sent price alerts for {len(price_alerts)} items")
# Send scraping summary
summary_message = f"Daily Price Scraping Summary:\n\n"
summary_message += f"📊 Products scraped: {len(products)}\n"
summary_message += f"✅ Successful: {successful}\n"
summary_message += f"❌ Failed: {failed}\n"
summary_message += f"🎯 Price alerts: {len(price_alerts)}\n"
summary_message += f"🕐 Completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
await notification_manager.send_notification(
subject="Daily Price Scraping Complete",
message=summary_message
)
logger.info("Sent scraping summary")
return {
'message': 'Scraping completed successfully',
'total_products': len(products),
'successful': successful,
'failed': failed,
'price_alerts': len(price_alerts)
}
except Exception as e:
logger.error(f"Webhook scraping failed: {str(e)}", exc_info=True)
# Send error notification
try:
await notification_manager.send_notification(
subject="Price Scraping Failed",
message=f"Daily price scraping failed with error:\n\n{str(e)}"
)
except:
pass
return {'error': str(e)}
# Run the async function
result = asyncio.run(run_scrape())
return jsonify(result)
except Exception as e:
logger.error(f"Webhook error: {str(e)}", exc_info=True)
return jsonify({'error': str(e)}), 500
@app.route('/webhook/shopping-list', methods=['POST', 'GET'])
@webhook_auth_required
def webhook_shopping_list():
"""Webhook endpoint to send daily shopping list"""
try:
from .shopping_list import AutoShoppingListGenerator
from .notification import NotificationManager
config = Config()
if config.has_config_error():
return jsonify({'error': 'Configuration error'}), 500
db_manager = DatabaseManager(config.database_path)
shopping_list_generator = AutoShoppingListGenerator(db_manager)
notification_manager = NotificationManager(config)
# Generate shopping lists
shopping_lists = shopping_list_generator.generate_all_shopping_lists()
if shopping_lists:
shopping_message = "Daily Shopping List (Best Prices):\n\n"
total_savings = 0
for store_name, store_list in shopping_lists.items():
if store_list.items:
shopping_message += f"🏪 {store_name.upper()}:\n"
store_total = 0
for item in store_list.items:
shopping_message += f"{item.product_name} - £{item.current_price}\n"
store_total += item.current_price
if item.savings_amount > 0:
total_savings += item.savings_amount
shopping_message += f" Subtotal: £{store_total:.2f}\n\n"
if total_savings > 0:
shopping_message += f"💰 Total Savings: £{total_savings:.2f}\n"
# Send email using asyncio
async def send_email():
await notification_manager.send_notification(
subject="Daily Shopping List - Best Prices",
message=shopping_message
)
asyncio.run(send_email())
return jsonify({
'message': 'Shopping list sent successfully',
'stores': list(shopping_lists.keys()),
'total_savings': total_savings
})
else:
return jsonify({'message': 'No shopping lists generated'})
except Exception as e:
logger.error(f"Shopping list webhook error: {str(e)}", exc_info=True)
return jsonify({'error': str(e)}), 500
@app.route('/webhook/scrape-and-list', methods=['POST', 'GET'])
@webhook_auth_required
def webhook_scrape_and_list():
"""Webhook endpoint to scrape prices AND send shopping list"""
try:
# First trigger scraping
scrape_response = webhook_scrape()
scrape_data = scrape_response.get_json()
if 'error' in scrape_data:
return jsonify({'scraping_error': scrape_data['error']}), 500
# Then send shopping list
list_response = webhook_shopping_list()
list_data = list_response.get_json()
return jsonify({
'message': 'Scraping and shopping list completed',
'scraping': scrape_data,
'shopping_list': list_data
})
except Exception as e:
logger.error(f"Combined webhook error: {str(e)}", exc_info=True)
return jsonify({'error': str(e)}), 500
@app.route('/webhook/health', methods=['GET'])
def webhook_health():
"""Health check endpoint for webhooks"""
return jsonify({
'status': 'healthy',
'timestamp': datetime.now().isoformat(),
'endpoints': [
'/webhook/scrape',
'/webhook/shopping-list',
'/webhook/scrape-and-list'
]
})
return app return app