# spiders.py import json import scrapy class TitleCreatorSpider(scrapy.Spider): name = "title_creator" def __init__(self, urls=None, selected_pt=None, *args, **kwargs): super(TitleCreatorSpider, self).__init__(*args, **kwargs) self.start_urls = urls or [] self.selected_pt = selected_pt self.results = [] def parse(self, response): # We import here to avoid 'AppRegistryNotReady' errors in Django from .views import construct_dynamic_title new_title = "" status_d = False if response.status != 200: new_title = f"Failed (HTTP {response.status})" status_d = False else: print("response",response) script_tag = response.css('script#__NEXT_DATA__::text').get() if script_tag: try: raw_data = json.loads(script_tag) new_title = construct_dynamic_title(raw_data, self.selected_pt) status_d = True except Exception: new_title = "Data Parsing Error" status_d = False else: new_title = "Attribute not found (Empty Script)" status_d = False # script_tag = response.css('script#__NEXT_DATA__::text').get() # if script_tag: # try: # raw_data = json.loads(script_tag) # new_title = construct_dynamic_title(raw_data, self.selected_pt) # except Exception: # new_title = "Data Parsing Error" # else: # new_title = "Could not find __NEXT_DATA__" self.results.append({ "id": self.selected_pt, "url": response.url, "new_title": new_title, "status": status_d })