spiders.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # spiders.py
  2. import json
  3. import scrapy
  4. class TitleCreatorSpider(scrapy.Spider):
  5. name = "title_creator"
  6. def __init__(self, urls=None, selected_pt=None, *args, **kwargs):
  7. super(TitleCreatorSpider, self).__init__(*args, **kwargs)
  8. self.start_urls = urls or []
  9. self.selected_pt = selected_pt
  10. self.results = []
  11. def parse(self, response):
  12. # We import here to avoid 'AppRegistryNotReady' errors in Django
  13. from .views import construct_dynamic_title
  14. new_title = ""
  15. status_d = False
  16. if response.status != 200:
  17. new_title = f"Failed (HTTP {response.status})"
  18. status_d = False
  19. else:
  20. print("response",response)
  21. script_tag = response.css('script#__NEXT_DATA__::text').get()
  22. if script_tag:
  23. try:
  24. raw_data = json.loads(script_tag)
  25. new_title = construct_dynamic_title(raw_data, self.selected_pt)
  26. status_d = True
  27. except Exception:
  28. new_title = "Data Parsing Error"
  29. status_d = False
  30. else:
  31. new_title = "Attribute not found (Empty Script)"
  32. status_d = False
  33. # script_tag = response.css('script#__NEXT_DATA__::text').get()
  34. # if script_tag:
  35. # try:
  36. # raw_data = json.loads(script_tag)
  37. # new_title = construct_dynamic_title(raw_data, self.selected_pt)
  38. # except Exception:
  39. # new_title = "Data Parsing Error"
  40. # else:
  41. # new_title = "Could not find __NEXT_DATA__"
  42. self.results.append({
  43. "id": self.selected_pt,
  44. "url": response.url,
  45. "new_title": new_title,
  46. "status": status_d
  47. })