| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- # spiders.py
- import json
- import scrapy
- class TitleCreatorSpider(scrapy.Spider):
- name = "title_creator"
- def __init__(self, urls=None, selected_pt=None, *args, **kwargs):
- super(TitleCreatorSpider, self).__init__(*args, **kwargs)
- self.start_urls = urls or []
- self.selected_pt = selected_pt
- self.results = []
- def parse(self, response):
- # We import here to avoid 'AppRegistryNotReady' errors in Django
- from .views import construct_dynamic_title
- new_title = ""
- status_d = False
-
- if response.status != 200:
- new_title = f"Failed (HTTP {response.status})"
- status_d = False
- else:
- print("response",response)
- script_tag = response.css('script#__NEXT_DATA__::text').get()
- if script_tag:
- try:
- raw_data = json.loads(script_tag)
- new_title = construct_dynamic_title(raw_data, self.selected_pt)
- status_d = True
- except Exception:
- new_title = "Data Parsing Error"
- status_d = False
- else:
- new_title = "Attribute not found (Empty Script)"
- status_d = False
-
- # script_tag = response.css('script#__NEXT_DATA__::text').get()
- # if script_tag:
- # try:
- # raw_data = json.loads(script_tag)
- # new_title = construct_dynamic_title(raw_data, self.selected_pt)
- # except Exception:
- # new_title = "Data Parsing Error"
- # else:
- # new_title = "Could not find __NEXT_DATA__"
- self.results.append({
- "id": self.selected_pt,
- "url": response.url,
- "new_title": new_title,
- "status": status_d
- })
|