scraper fix
This commit is contained in:
82
config.json
82
config.json
@@ -7,6 +7,15 @@
|
|||||||
"max_concurrent_requests": 1,
|
"max_concurrent_requests": 1,
|
||||||
"timeout": 30,
|
"timeout": 30,
|
||||||
"retry_attempts": 3,
|
"retry_attempts": 3,
|
||||||
|
"special_pricing": {
|
||||||
|
"enabled": true,
|
||||||
|
"prefer_delivery_prices": true,
|
||||||
|
"detect_strikethrough": true,
|
||||||
|
"detect_was_now_patterns": true,
|
||||||
|
"detect_percentage_discounts": true,
|
||||||
|
"min_discount_threshold": 0.05,
|
||||||
|
"max_price_difference_ratio": 0.5
|
||||||
|
},
|
||||||
"user_agents": [
|
"user_agents": [
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||||
@@ -33,25 +42,25 @@
|
|||||||
"base_url": "https://www.jjfoodservice.com",
|
"base_url": "https://www.jjfoodservice.com",
|
||||||
"selectors": {
|
"selectors": {
|
||||||
"price": [
|
"price": [
|
||||||
".price",
|
".price-delivery",
|
||||||
".product-price",
|
".delivery-price",
|
||||||
"[data-testid='price']",
|
".price"
|
||||||
".price-value",
|
],
|
||||||
".current-price",
|
"delivery_price": [
|
||||||
".product-card-price"
|
".price-delivery",
|
||||||
|
".delivery-price"
|
||||||
|
],
|
||||||
|
"special_offer": [
|
||||||
|
".special-offer",
|
||||||
|
".sale-price",
|
||||||
|
".offer-price"
|
||||||
],
|
],
|
||||||
"title": [
|
"title": [
|
||||||
"h1",
|
"h1"
|
||||||
".product-title",
|
|
||||||
".product-name",
|
|
||||||
"[data-testid='product-title']",
|
|
||||||
".product-card-title"
|
|
||||||
],
|
],
|
||||||
"availability": [
|
"availability": [
|
||||||
".stock-status",
|
".stock-status",
|
||||||
".availability",
|
".availability"
|
||||||
"[data-testid='availability']",
|
|
||||||
".product-availability"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -60,26 +69,25 @@
|
|||||||
"base_url": "https://www.atoz-catering.co.uk",
|
"base_url": "https://www.atoz-catering.co.uk",
|
||||||
"selectors": {
|
"selectors": {
|
||||||
"price": [
|
"price": [
|
||||||
".price",
|
".my-price.price-offer",
|
||||||
".product-price",
|
|
||||||
".delivery-price",
|
".delivery-price",
|
||||||
".collection-price",
|
".price"
|
||||||
"span:contains('£')",
|
],
|
||||||
".price-value"
|
"delivery_price": [
|
||||||
|
".delivery-price",
|
||||||
|
".price-delivery"
|
||||||
|
],
|
||||||
|
"special_offer": [
|
||||||
|
".my-price.price-offer",
|
||||||
|
".special-offer",
|
||||||
|
".sale-price"
|
||||||
],
|
],
|
||||||
"title": [
|
"title": [
|
||||||
"h1",
|
"h1"
|
||||||
".product-title",
|
|
||||||
".product-name",
|
|
||||||
"a[href*='/products/product/']",
|
|
||||||
".product-link"
|
|
||||||
],
|
],
|
||||||
"availability": [
|
"availability": [
|
||||||
".stock-status",
|
".stock-status",
|
||||||
".availability",
|
".availability"
|
||||||
".add-to-basket",
|
|
||||||
"button:contains('Add To Basket')",
|
|
||||||
".out-of-stock"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -90,22 +98,18 @@
|
|||||||
"price": [
|
"price": [
|
||||||
".a-price-whole",
|
".a-price-whole",
|
||||||
".a-price .a-offscreen",
|
".a-price .a-offscreen",
|
||||||
|
"#priceblock_ourprice"
|
||||||
|
],
|
||||||
|
"special_offer": [
|
||||||
"#priceblock_dealprice",
|
"#priceblock_dealprice",
|
||||||
"#priceblock_ourprice",
|
".a-price-strike .a-offscreen",
|
||||||
".a-price-range",
|
".a-price-was"
|
||||||
".a-price.a-text-price.a-size-medium.apexPriceToPay",
|
|
||||||
".a-price-current"
|
|
||||||
],
|
],
|
||||||
"title": [
|
"title": [
|
||||||
"#productTitle",
|
"#productTitle"
|
||||||
".product-title",
|
|
||||||
"h1.a-size-large"
|
|
||||||
],
|
],
|
||||||
"availability": [
|
"availability": [
|
||||||
"#availability span",
|
"#availability span"
|
||||||
".a-size-medium.a-color-success",
|
|
||||||
".a-size-medium.a-color-state",
|
|
||||||
"#availability .a-declarative"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user