Files
price-tracker/Dockerfile
Oli Passey de5a03d543 3pm cron
2025-07-01 14:57:07 +01:00

143 lines
4.4 KiB
Docker

# Use Python 3.11 slim image (revert from 3.12)
FROM python:3.11-slim
# Install cron and other dependencies
RUN apt-get update && apt-get install -y \
cron \
gcc \
curl \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
FLASK_APP=main.py \
FLASK_ENV=production
# Optional: Set default configuration via environment variables
ENV DATABASE_PATH=/app/data/price_tracker.db \
DELAY_BETWEEN_REQUESTS=2 \
MAX_CONCURRENT_REQUESTS=1 \
REQUEST_TIMEOUT=30 \
RETRY_ATTEMPTS=3
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Create non-root user for security
RUN useradd --create-home --shell /bin/bash tracker
# Create necessary directories with proper permissions
RUN mkdir -p /app/data /var/log && \
chmod 755 /app/data /var/log && \
chown -R tracker:tracker /app/data /var/log
# Copy application code
COPY . .
# Create the daily scraper script
RUN echo '#!/usr/bin/env python3\n\
import sys\n\
import os\n\
import asyncio\n\
import logging\n\
from datetime import datetime\n\
\n\
# Configure logging\n\
logging.basicConfig(\n\
level=logging.INFO,\n\
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",\n\
handlers=[\n\
logging.FileHandler("/var/log/price_scraper.log"),\n\
logging.StreamHandler()\n\
]\n\
)\n\
\n\
logger = logging.getLogger(__name__)\n\
\n\
async def main():\n\
try:\n\
from src.config import Config\n\
from src.database import DatabaseManager\n\
from src.scraper_manager import ScraperManager\n\
\n\
logger.info("Starting scheduled price scraping")\n\
\n\
config = Config()\n\
if config.has_config_error():\n\
logger.error(f"Configuration error: {config.get_config_error()}")\n\
return\n\
\n\
db_manager = DatabaseManager(config.database_path)\n\
scraper_manager = ScraperManager(config)\n\
\n\
products = db_manager.get_all_products()\n\
if not products:\n\
logger.warning("No products found to scrape")\n\
return\n\
\n\
logger.info(f"Scraping {len(products)} products")\n\
results = await scraper_manager.scrape_all_products(products)\n\
\n\
total = sum(len(sites) for sites in results.values())\n\
successful = sum(1 for sites in results.values() for result in sites.values() if result["success"])\n\
\n\
logger.info(f"Scraping complete: {successful}/{total} successful")\n\
\n\
# Save results to database\n\
for product_id, site_results in results.items():\n\
for site_name, result in site_results.items():\n\
if result["success"]:\n\
db_manager.save_price_history(\n\
product_id=product_id,\n\
site_name=site_name,\n\
price=result["price"],\n\
availability=result.get("availability", True),\n\
timestamp=datetime.now()\n\
)\n\
\n\
except Exception as e:\n\
logger.error(f"Scheduled scraping failed: {str(e)}", exc_info=True)\n\
\n\
if __name__ == "__main__":\n\
asyncio.run(main())\n\
' > /app/daily_scraper.py && chmod +x /app/daily_scraper.py
# Create cron job - runs daily at 8 AM
RUN echo "0 15 * * * cd /app && python daily_scraper.py >> /var/log/cron.log 2>&1" > /etc/cron.d/price-tracker
RUN chmod 0644 /etc/cron.d/price-tracker
RUN crontab /etc/cron.d/price-tracker
# Create startup script that ensures directories exist and have correct permissions
RUN echo '#!/bin/bash\n\
# Ensure data directory exists and has correct permissions\n\
mkdir -p /app/data /var/log\n\
chown -R tracker:tracker /app/data /var/log\n\
chmod 755 /app/data /var/log\n\
\n\
# Start cron in background\n\
cron\n\
\n\
# Switch to non-root user and start web server\n\
exec su tracker -c "python main.py --mode web"\n\
' > /app/start.sh && chmod +x /app/start.sh
# Set ownership for application files
RUN chown -R tracker:tracker /app
# Expose port
EXPOSE 5000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:5000/ || exit 1
# Run startup script
CMD ["/app/start.sh"]