views.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. import os
  2. import json
  3. import time
  4. import requests
  5. import uuid
  6. import threading
  7. import pandas as pd
  8. from bs4 import BeautifulSoup
  9. from django.shortcuts import get_object_or_404, redirect, render
  10. from django.core.files.storage import FileSystemStorage
  11. from django.http import JsonResponse
  12. from .models import TitleMapping, AttributeMaster,ProcessingTask # <--- THIS FIXES THE ERROR
  13. from django.conf import settings
  14. import cloudscraper
  15. from django.contrib import messages
  16. from django.contrib.auth import authenticate, login, logout
  17. # from django.contrib.auth.decorators import login_required
  18. from .decorators import login_required
  19. from django.contrib.auth.hashers import make_password
  20. # To login
  21. def login_view(request):
  22. if request.method == "POST":
  23. email = request.POST.get("username")
  24. password = request.POST.get("password")
  25. print("Email: ", email)
  26. print("Password: ", password)
  27. # Authenticate the user
  28. user = authenticate(request, username=email, password=password)
  29. print("user",user)
  30. if user is not None:
  31. print("User authenticated successfully.")
  32. login(request, user)
  33. request.session['user_email'] = user.email
  34. # request.session = user
  35. # request.session['full_name'] = f"{user.firstName} {user.lastName or ''}".strip()
  36. # # Store both human-readable role and code
  37. # request.session['role'] = user.get_role_display() # 'Super Admin', 'Admin', 'RTA'
  38. # request.session['role_code'] = user.role # '0', '1', '2'
  39. # request.session['joining_date'] = user.createdDate.strftime("%b, %Y")
  40. # request.session['userId'] = user.userId
  41. # 📌 Store client_id if user has a client associated
  42. # request.session['client_id'] = user.client.clientId if user.client else None
  43. return redirect('title_creator_home')
  44. else:
  45. print("Invalid credentials.")
  46. messages.error(request, "Invalid email or password.")
  47. return redirect('login')
  48. print("Rendering login page.")
  49. return render(request, 'login.html')
  50. # To logout
  51. @login_required
  52. def logout_view(request):
  53. logout(request)
  54. messages.success(request, "You have been logged out successfully.")
  55. return redirect('login')
  56. @login_required
  57. def master_config_view(request):
  58. if request.method == 'POST':
  59. action = request.POST.get('action')
  60. # Part 1: Add New Attribute
  61. if action == 'add_attribute':
  62. name = request.POST.get('attr_name')
  63. is_m = request.POST.get('is_mandatory') == 'on'
  64. if name:
  65. AttributeMaster.objects.get_or_create(name=name.strip(), defaults={'is_mandatory': is_m})
  66. # Part 2: Add New Title Mapping (Product Type)
  67. # --- MAPPING ACTIONS (CREATE & UPDATE) ---
  68. elif action in ['add_mapping', 'update_mapping']:
  69. pt = request.POST.get('pt_name')
  70. seq = request.POST.get('sequence')
  71. edit_id = request.POST.get('edit_id')
  72. if action == 'update_mapping' and edit_id:
  73. # Update existing
  74. mapping = get_object_or_404(TitleMapping, id=edit_id)
  75. mapping.product_type = pt.strip()
  76. mapping.format_sequence = seq
  77. mapping.save()
  78. else:
  79. # Create new (using get_or_create to prevent exact duplicates)
  80. if pt:
  81. TitleMapping.objects.get_or_create(
  82. product_type=pt.strip(),
  83. defaults={'format_sequence': seq}
  84. )
  85. # --- MAPPING DELETE ---
  86. elif action == 'delete_mapping':
  87. mapping_id = request.POST.get('id')
  88. TitleMapping.objects.filter(id=mapping_id).delete()
  89. # Part 3: Delete functionality
  90. elif action == 'delete_attribute':
  91. AttributeMaster.objects.filter(id=request.POST.get('id')).delete()
  92. return redirect('title_creator_master')
  93. # GET: Load all data
  94. context = {
  95. 'attributes': AttributeMaster.objects.all().order_by('name'),
  96. 'mappings': TitleMapping.objects.all().order_by('product_type'),
  97. }
  98. return render(request, 'title_creator_master.html', context)
  99. def save_config_api(request):
  100. if request.method == 'POST':
  101. try:
  102. data = json.loads(request.body)
  103. # Update Mandatory Attributes
  104. # Expected data: { "mandatory_ids": [1, 3, 5] }
  105. AttributeMaster.objects.all().update(is_mandatory=False)
  106. AttributeMaster.objects.filter(id__in=data.get('mandatory_ids', [])).update(is_mandatory=True)
  107. # Update Title Sequences
  108. # Expected data: { "mappings": [{"id": 1, "sequence": "Brand,Color"}] }
  109. for m in data.get('mappings', []):
  110. TitleMapping.objects.filter(id=m['id']).update(format_sequence=m['sequence'])
  111. return JsonResponse({'success': True})
  112. except Exception as e:
  113. return JsonResponse({'success': False, 'error': str(e)})
  114. def extract_title_or_error(product,selected_pt):
  115. # 1. Identify Product Type from JSON to fetch the correct Mapping
  116. pt_name = selected_pt
  117. try:
  118. mapping = TitleMapping.objects.get(product_type=pt_name)
  119. config_sequence = mapping.get_sequence_list()
  120. except TitleMapping.DoesNotExist:
  121. return f"No Title Configuration found for Product Type: {pt_name}"
  122. # 2. Get Mandatory list from DB
  123. mandatory_fields = list(AttributeMaster.objects.filter(is_mandatory=True).values_list('name', flat=True))
  124. # 3. Data Extraction (Your logic)
  125. extracted_data = {
  126. "Brand": product.get("brand"),
  127. "Product Type": pt_name
  128. }
  129. dimensions = {}
  130. for group in product.get("attributeGroups", []):
  131. for attr in group.get("attributes", []):
  132. desc = attr.get("attributeDesc")
  133. value = attr.get("attributeValue")
  134. if desc in ["Door Type", "Capacity", "Color"]:
  135. extracted_data[desc] = value
  136. elif desc in ["Width", "Depth", "Height"]:
  137. dimensions[desc] = value
  138. if {"Width", "Depth", "Height"}.issubset(dimensions):
  139. extracted_data["Dimensions"] = f'{dimensions["Width"]} x {dimensions["Depth"]} x {dimensions["Height"]}'
  140. # 4. Build Title and Check Mandatory Rules from DB
  141. final_title_parts = []
  142. missing_mandatory = []
  143. for attr_name in config_sequence:
  144. val = extracted_data.get(attr_name)
  145. if not val or str(val).strip() == "":
  146. # If DB says it's mandatory, track the error
  147. if attr_name in mandatory_fields:
  148. missing_mandatory.append(attr_name)
  149. continue
  150. final_title_parts.append(str(val))
  151. # 5. Result
  152. if missing_mandatory:
  153. return f"Could not found {', '.join(missing_mandatory)} on Product Details page"
  154. return " ".join(final_title_parts)
  155. def construct_dynamic_title(raw_data,selected_pt):
  156. try:
  157. product = raw_data.get("props", {}).get("pageProps", {}).get("product", {})
  158. if not product: return "Product data not found"
  159. return extract_title_or_error(product,selected_pt).strip()
  160. except Exception:
  161. return "Could not found attribute name on product details page"
  162. @login_required
  163. def title_creator_view(request):
  164. if request.method == 'POST' and request.FILES.get('file'):
  165. excel_file = request.FILES['file']
  166. selected_pt = request.POST.get('product_type')
  167. fs = FileSystemStorage()
  168. filename = fs.save(excel_file.name, excel_file)
  169. file_path = fs.path(filename)
  170. try:
  171. # 1. Read Excel
  172. df = pd.read_excel(file_path)
  173. # 2. Add the NEW COLUMN if it doesn't exist
  174. if 'New_Generated_Title' not in df.columns:
  175. df['New_Generated_Title'] = ""
  176. headers = {"User-Agent": "Mozilla/5.0"}
  177. results_for_ui = []
  178. # 3. Process each row
  179. for index, row in df.iterrows():
  180. url = row.get('URL') # Assumes your excel has a 'URL' column
  181. new_title = ""
  182. if pd.notna(url):
  183. try:
  184. resp = requests.get(url, headers=headers, timeout=10)
  185. soup = BeautifulSoup(resp.content, 'html.parser')
  186. script_tag = soup.find('script', id='__NEXT_DATA__')
  187. if script_tag:
  188. raw_data = json.loads(script_tag.string)
  189. new_title = construct_dynamic_title(raw_data,selected_pt)
  190. else:
  191. new_title = "Could not found attribute name on product details page"
  192. except:
  193. new_title = "Could not found attribute name on product details page"
  194. else:
  195. new_title = "URL Missing"
  196. # Update the DataFrame column for this row
  197. df.at[index, 'New_Generated_Title'] = new_title
  198. results_for_ui.append({
  199. "id" : index + 1,
  200. "url": url,
  201. "new_title": new_title,
  202. "status": True
  203. })
  204. time.sleep(1) # Safety delay
  205. # 4. Save the modified Excel to a new path
  206. output_filename = f"processed_{excel_file.name}"
  207. output_path = os.path.join(fs.location, output_filename)
  208. df.to_excel(output_path, index=False)
  209. return JsonResponse({
  210. 'success': True,
  211. 'results': results_for_ui,
  212. 'download_url': fs.url(output_filename)
  213. })
  214. finally:
  215. if os.path.exists(file_path): os.remove(file_path)
  216. # GET request: Fetch all product types for the dropdown
  217. product_types = TitleMapping.objects.all().values_list('product_type', flat=True)
  218. return render(request, 'title_creator_index.html', {'product_types': product_types})
  219. # return render(request, 'title_creator_index.html')
  220. # def process_excel_task(file_path, selected_pt, task_id):
  221. # task = ProcessingTask.objects.get(task_id=task_id)
  222. # try:
  223. # df = pd.read_excel(file_path)
  224. # if 'New_Generated_Title' not in df.columns:
  225. # df['New_Generated_Title'] = ""
  226. # headers = {"User-Agent": "Mozilla/5.0"}
  227. # for index, row in df.iterrows():
  228. # url = row.get('URL')
  229. # # ... [Insert your existing BeautifulSoup/Scraping Logic Here] ...
  230. # # Example:
  231. # # new_title = construct_dynamic_title(raw_data, selected_pt)
  232. # # df.at[index, 'New_Generated_Title'] = new_title
  233. # time.sleep(1)
  234. # # Save Final File
  235. # output_filename = f"completed_{task.original_filename}"
  236. # from django.conf import settings
  237. # output_path = os.path.join(settings.MEDIA_ROOT, output_filename)
  238. # df.to_excel(output_path, index=False)
  239. # # Update Task Status
  240. # task.status = 'COMPLETED'
  241. # task.download_url = f"{settings.MEDIA_URL}{output_filename}"
  242. # task.save()
  243. # except Exception as e:
  244. # task.status = 'FAILED'
  245. # task.save()
  246. # print(f"Error: {e}")
  247. # finally:
  248. # if os.path.exists(file_path):
  249. # os.remove(file_path)
  250. def process_excel_task(file_path, selected_pt, task_id):
  251. # Retrieve the task record from the database
  252. scraper = cloudscraper.create_scraper() # This replaces requests.get
  253. task = ProcessingTask.objects.get(task_id=task_id)
  254. try:
  255. # 1. Read Excel
  256. df = pd.read_excel(file_path)
  257. # 2. Add the NEW COLUMN if it doesn't exist
  258. if 'New_Generated_Title' not in df.columns:
  259. df['New_Generated_Title'] = ""
  260. headers = {"User-Agent": "Mozilla/5.0"}
  261. # 3. Process each row
  262. for index, row in df.iterrows():
  263. url = row.get('URL')
  264. new_title = ""
  265. if pd.notna(url):
  266. try:
  267. # Scraping logic
  268. # resp = scraper.get(url, timeout=15)
  269. resp = requests.get(url, headers=headers, timeout=10)
  270. if resp.status_code == 200:
  271. soup = BeautifulSoup(resp.content, 'html.parser')
  272. script_tag = soup.find('script', id='__NEXT_DATA__')
  273. if script_tag:
  274. try:
  275. raw_data = json.loads(script_tag.string)
  276. # Calling your dynamic title helper
  277. new_title = construct_dynamic_title(raw_data, selected_pt)
  278. except Exception:
  279. new_title = "Data Parsing Error"
  280. else:
  281. new_title = "Could not found attribute name on product details page"
  282. else:
  283. new_title = f"HTTP Error: {resp.status_code}"
  284. except Exception:
  285. new_title = "Request Failed (Timeout/Connection)"
  286. else:
  287. new_title = "URL Missing"
  288. # Update the DataFrame
  289. df.at[index, 'New_Generated_Title'] = new_title
  290. # Optional: Sleep to prevent getting blocked by the server
  291. time.sleep(1)
  292. # 4. Save the modified Excel to the MEDIA folder
  293. output_filename = f"completed_{task_id}_{task.original_filename}"
  294. # Ensure media directory exists
  295. if not os.path.exists(settings.MEDIA_ROOT):
  296. os.makedirs(settings.MEDIA_ROOT)
  297. output_path = os.path.join(settings.MEDIA_ROOT, output_filename)
  298. df.to_excel(output_path, index=False)
  299. # 5. Final Status Update
  300. task.status = 'COMPLETED'
  301. # Construct the URL for the frontend to download
  302. task.download_url = f"{settings.MEDIA_URL}{output_filename}"
  303. task.save()
  304. except Exception as e:
  305. print(f"Critical Task Failure: {e}")
  306. task.status = 'FAILED'
  307. task.save()
  308. finally:
  309. # 6. Cleanup the temporary uploaded file
  310. if os.path.exists(file_path):
  311. os.remove(file_path)
  312. @login_required
  313. def title_creator_async_view(request):
  314. if request.method == 'POST' and request.FILES.get('file'):
  315. excel_file = request.FILES['file']
  316. selected_pt = request.POST.get('product_type')
  317. # 1. Save file temporarily
  318. fs = FileSystemStorage()
  319. filename = fs.save(f"temp_{uuid.uuid4().hex}_{excel_file.name}", excel_file)
  320. file_path = fs.path(filename)
  321. # 2. Create Task Record
  322. task_id = str(uuid.uuid4())
  323. ProcessingTask.objects.create(
  324. task_id=task_id,
  325. original_filename=excel_file.name,
  326. status='PENDING'
  327. )
  328. # 3. Start Background Thread
  329. thread = threading.Thread(
  330. target=process_excel_task,
  331. args=(file_path, selected_pt, task_id)
  332. )
  333. thread.start()
  334. return JsonResponse({
  335. 'status': 'started',
  336. 'task_id': task_id,
  337. 'message': 'File is processing in the background.'
  338. })
  339. return JsonResponse({'error': 'Invalid request'}, status=400)
  340. # 2. This view is called repeatedly by pollStatus() in your JS
  341. def check_status(request, task_id):
  342. # Look up the task in the database
  343. task = get_object_or_404(ProcessingTask, task_id=task_id)
  344. return JsonResponse({
  345. 'status': task.status, # 'PENDING', 'COMPLETED', or 'FAILED'
  346. 'file_name': task.original_filename,
  347. 'download_url': task.download_url # This will be null until status is COMPLETED
  348. })