views.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. import os
  2. import json
  3. import time
  4. import requests
  5. import uuid
  6. import threading
  7. import pandas as pd
  8. from bs4 import BeautifulSoup
  9. from django.shortcuts import get_object_or_404, redirect, render
  10. from django.core.files.storage import FileSystemStorage
  11. from django.http import JsonResponse
  12. from .models import TitleMapping, AttributeMaster,ProcessingTask # <--- THIS FIXES THE ERROR
  13. from django.conf import settings
  14. import cloudscraper
  15. from django.contrib import messages
  16. from django.contrib.auth import authenticate, login, logout
  17. # from django.contrib.auth.decorators import login_required
  18. from .decorators import login_required
  19. from django.contrib.auth.hashers import make_password
  20. import random
  21. # To login
  22. def login_view(request):
  23. if request.method == "POST":
  24. email = request.POST.get("username")
  25. password = request.POST.get("password")
  26. print("Email: ", email)
  27. print("Password: ", password)
  28. # Authenticate the user
  29. user = authenticate(request, username=email, password=password)
  30. print("user",user)
  31. if user is not None:
  32. print("User authenticated successfully.")
  33. login(request, user)
  34. request.session['user_email'] = user.email
  35. # request.session = user
  36. # request.session['full_name'] = f"{user.firstName} {user.lastName or ''}".strip()
  37. # # Store both human-readable role and code
  38. # request.session['role'] = user.get_role_display() # 'Super Admin', 'Admin', 'RTA'
  39. # request.session['role_code'] = user.role # '0', '1', '2'
  40. # request.session['joining_date'] = user.createdDate.strftime("%b, %Y")
  41. # request.session['userId'] = user.userId
  42. # 📌 Store client_id if user has a client associated
  43. # request.session['client_id'] = user.client.clientId if user.client else None
  44. return redirect('title_creator_home')
  45. else:
  46. print("Invalid credentials.")
  47. messages.error(request, "Invalid email or password.")
  48. return redirect('login')
  49. print("Rendering login page.")
  50. return render(request, 'login.html')
  51. # To logout
  52. @login_required
  53. def logout_view(request):
  54. logout(request)
  55. messages.success(request, "You have been logged out successfully.")
  56. return redirect('login')
  57. @login_required
  58. def master_config_view(request):
  59. if request.method == 'POST':
  60. action = request.POST.get('action')
  61. # Part 1: Add New Attribute
  62. if action == 'add_attribute':
  63. name = request.POST.get('attr_name')
  64. is_m = request.POST.get('is_mandatory') == 'on'
  65. if name:
  66. AttributeMaster.objects.get_or_create(name=name.strip(), defaults={'is_mandatory': is_m})
  67. # Part 2: Add New Title Mapping (Product Type)
  68. # --- MAPPING ACTIONS (CREATE & UPDATE) ---
  69. elif action in ['add_mapping', 'update_mapping']:
  70. pt = request.POST.get('pt_name')
  71. seq = request.POST.get('sequence')
  72. edit_id = request.POST.get('edit_id')
  73. if action == 'update_mapping' and edit_id:
  74. # Update existing
  75. mapping = get_object_or_404(TitleMapping, id=edit_id)
  76. mapping.product_type = pt.strip()
  77. mapping.format_sequence = seq
  78. mapping.save()
  79. else:
  80. # Create new (using get_or_create to prevent exact duplicates)
  81. if pt:
  82. TitleMapping.objects.get_or_create(
  83. product_type=pt.strip(),
  84. defaults={'format_sequence': seq}
  85. )
  86. # --- MAPPING DELETE ---
  87. elif action == 'delete_mapping':
  88. mapping_id = request.POST.get('id')
  89. TitleMapping.objects.filter(id=mapping_id).delete()
  90. # Part 3: Delete functionality
  91. elif action == 'delete_attribute':
  92. AttributeMaster.objects.filter(id=request.POST.get('id')).delete()
  93. return redirect('title_creator_master')
  94. # GET: Load all data
  95. context = {
  96. 'attributes': AttributeMaster.objects.all().order_by('name'),
  97. 'mappings': TitleMapping.objects.all().order_by('product_type'),
  98. }
  99. return render(request, 'title_creator_master.html', context)
  100. def save_config_api(request):
  101. if request.method == 'POST':
  102. try:
  103. data = json.loads(request.body)
  104. # Update Mandatory Attributes
  105. # Expected data: { "mandatory_ids": [1, 3, 5] }
  106. AttributeMaster.objects.all().update(is_mandatory=False)
  107. AttributeMaster.objects.filter(id__in=data.get('mandatory_ids', [])).update(is_mandatory=True)
  108. # Update Title Sequences
  109. # Expected data: { "mappings": [{"id": 1, "sequence": "Brand,Color"}] }
  110. for m in data.get('mappings', []):
  111. TitleMapping.objects.filter(id=m['id']).update(format_sequence=m['sequence'])
  112. return JsonResponse({'success': True})
  113. except Exception as e:
  114. return JsonResponse({'success': False, 'error': str(e)})
  115. # def extract_title_or_error(product,selected_pt):
  116. # # 1. Identify Product Type from JSON to fetch the correct Mapping
  117. # pt_name = selected_pt
  118. # try:
  119. # mapping = TitleMapping.objects.get(product_type=pt_name)
  120. # config_sequence = mapping.get_sequence_list()
  121. # except TitleMapping.DoesNotExist:
  122. # return f"No Title Configuration found for Product Type: {pt_name}"
  123. # # 2. Get Mandatory list from DB
  124. # mandatory_fields = list(AttributeMaster.objects.filter(is_mandatory=True).values_list('name', flat=True))
  125. # # 3. Data Extraction (Your logic)
  126. # extracted_data = {
  127. # "Brand": product.get("brand"),
  128. # "Product Type": pt_name
  129. # }
  130. # dimensions = {}
  131. # for group in product.get("attributeGroups", []):
  132. # for attr in group.get("attributes", []):
  133. # desc = attr.get("attributeDesc")
  134. # value = attr.get("attributeValue")
  135. # if desc == "Capacity":
  136. # extracted_data[desc] = f"Capacity {value}"
  137. # if desc in ["Door Type", "Capacity", "Color"]:
  138. # extracted_data[desc] = value
  139. # elif desc in ["Width", "Depth", "Height"]:
  140. # dimensions[desc] = value
  141. # if {"Width", "Depth", "Height"}.issubset(dimensions):
  142. # # extracted_data["Dimensions"] = f'{dimensions["Width"]} x {dimensions["Depth"]} x {dimensions["Height"]}'
  143. # w, d, h = dimensions["Width"], dimensions["Depth"], dimensions["Height"]
  144. # extracted_data["Dimensions"] = f'{w}"w x {d}"d x {h}"h'
  145. # # 4. Build Title and Check Mandatory Rules from DB
  146. # final_title_parts = []
  147. # missing_mandatory = []
  148. # for attr_name in config_sequence:
  149. # val = extracted_data.get(attr_name)
  150. # if not val or str(val).strip() == "":
  151. # # If DB says it's mandatory, track the error
  152. # if attr_name in mandatory_fields:
  153. # missing_mandatory.append(attr_name)
  154. # continue
  155. # final_title_parts.append(str(val))
  156. # # 5. Result
  157. # if missing_mandatory:
  158. # return f"Could not found {', '.join(missing_mandatory)} on Product Details page"
  159. # return " ".join(final_title_parts)
  160. def extract_title_or_error(product, selected_pt):
  161. # 1. Identify Product Type
  162. pt_name = selected_pt
  163. try:
  164. mapping = TitleMapping.objects.get(product_type=pt_name)
  165. config_sequence = mapping.get_sequence_list()
  166. except TitleMapping.DoesNotExist:
  167. return f"No Title Configuration found for Product Type: {pt_name}"
  168. mandatory_fields = list(AttributeMaster.objects.filter(is_mandatory=True).values_list('name', flat=True))
  169. # 2. Data Extraction
  170. extracted_data = {
  171. "Brand": product.get("brand")+"©",
  172. "Product Type": pt_name
  173. }
  174. dimensions = {}
  175. for group in product.get("attributeGroups", []):
  176. for attr in group.get("attributes", []):
  177. desc = attr.get("attributeDesc")
  178. val = attr.get("attributeValue")
  179. if desc == "Capacity":
  180. extracted_data[desc] = f"Capacity {val}"
  181. elif desc in ["Door Type", "Color"]:
  182. extracted_data[desc] = val
  183. elif desc in ["Width", "Depth", "Height"]:
  184. dimensions[desc] = val
  185. if {"Width", "Depth", "Height"}.issubset(dimensions):
  186. w, d, h = dimensions["Width"], dimensions["Depth"], dimensions["Height"]
  187. # We use .replace(" in", "") to remove the existing unit before adding the " symbol
  188. w = dimensions["Width"].replace(" in", "").strip()
  189. d = dimensions["Depth"].replace(" in", "").strip()
  190. h = dimensions["Height"].replace(" in", "").strip()
  191. extracted_data["Dimensions"] = f'{w}"W x {d}"D x {h}"H'
  192. # 3. Build Title Parts
  193. final_title_parts = []
  194. missing_mandatory = []
  195. for attr_name in config_sequence:
  196. val = extracted_data.get(attr_name)
  197. if not val or str(val).strip() == "":
  198. if attr_name in mandatory_fields:
  199. missing_mandatory.append(attr_name)
  200. continue
  201. final_title_parts.append(str(val))
  202. if missing_mandatory:
  203. return f"Could not found {', '.join(missing_mandatory)} on Product Details page"
  204. # Helper function to join parts: Brand PT, Param1, Param2
  205. def construct_string(parts):
  206. if len(parts) <= 2:
  207. return " ".join(parts)
  208. return f"{parts[0]} {parts[1]}, {', '.join(parts[2:])}"
  209. current_title = construct_string(final_title_parts)
  210. # 4. Length Reduction Logic (Step-by-Step)
  211. print("Current Title 1 ########",current_title,len(current_title))
  212. # Step 1: Change "Capacity" -> "Cap."
  213. if len(current_title) > 100:
  214. for i, part in enumerate(final_title_parts):
  215. if "Capacity" in part:
  216. final_title_parts[i] = part.replace("Capacity", "Cap.")
  217. current_title = construct_string(final_title_parts)
  218. print("Current Title 2 ########",current_title,len(current_title))
  219. # Step 2: Shorten Product Type (e.g., Stainless Steel -> SS)
  220. # Step B: Dynamic Product Type Acronym
  221. if len(current_title) > 100:
  222. pt_part = final_title_parts[1]
  223. words = pt_part.split()
  224. if len(words) > 1:
  225. # Takes first letter of every word in the Product Type
  226. final_title_parts[1] = "".join([w[0].upper() for w in words])
  227. current_title = construct_string(final_title_parts)
  228. print("Current Title 3 ########",current_title,len(current_title))
  229. # Step 3: Remove spaces from attributes starting from the back
  230. # Brand (0) and Product Type (1) are skipped
  231. if len(current_title) > 100:
  232. for i in range(len(final_title_parts) - 1, 1, -1):
  233. if len(current_title) <= 100:
  234. break
  235. # Remove white spaces from the current attribute part
  236. final_title_parts[i] = final_title_parts[i].replace(" ", "")
  237. current_title = construct_string(final_title_parts)
  238. print("Current Title 4 ########",current_title,len(current_title))
  239. return current_title
  240. def construct_dynamic_title(raw_data,selected_pt):
  241. try:
  242. product = raw_data.get("props", {}).get("pageProps", {}).get("product", {})
  243. if not product: return "Product data not found"
  244. return extract_title_or_error(product,selected_pt).strip()
  245. except Exception:
  246. return "Could not found attribute name on product details page"
  247. @login_required
  248. def title_creator_view(request):
  249. if request.method == 'POST' and request.FILES.get('file'):
  250. excel_file = request.FILES['file']
  251. selected_pt = request.POST.get('product_type')
  252. fs = FileSystemStorage()
  253. filename = fs.save(excel_file.name, excel_file)
  254. file_path = fs.path(filename)
  255. try:
  256. # 1. Read Excel
  257. df = pd.read_excel(file_path)
  258. # 2. Add the NEW COLUMN if it doesn't exist
  259. if 'New_Generated_Title' not in df.columns:
  260. df['New_Generated_Title'] = ""
  261. headers = {"User-Agent": "Mozilla/5.0"}
  262. results_for_ui = []
  263. # Specific Headers for the Item# API
  264. api_headers = {
  265. "accept": "application/json, text/plain, */*",
  266. "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
  267. "client_id": "GEC",
  268. "referer": "https://www.globalindustrial.com/"
  269. }
  270. # 3. Process each row
  271. for index, row in df.iterrows():
  272. url = row.get('URL') # Assumes your excel has a 'URL' column
  273. item_number = row.get('Item#')
  274. new_title = ""
  275. final_url = None
  276. # Step 1: Resolve the URL
  277. if pd.notna(url) and str(url).startswith('http'):
  278. final_url = url
  279. elif pd.notna(item_number):
  280. # Call API to get URL from Item#
  281. api_url = f"https://www.globalindustrial.com/catalogApis/catalog/autosuggest?key={item_number}&features=true"
  282. try:
  283. api_resp = requests.get(api_url, headers=api_headers, timeout=10)
  284. if api_resp.status_code == 200:
  285. data = api_resp.json()
  286. final_url = data.get('exactMatch', {}).get('canonicalLink')
  287. except Exception as e:
  288. new_title = f"API Error for Item# {item_number}"
  289. if pd.notna(final_url):
  290. try:
  291. resp = requests.get(final_url, headers=headers, timeout=10)
  292. soup = BeautifulSoup(resp.content, 'html.parser')
  293. script_tag = soup.find('script', id='__NEXT_DATA__')
  294. if script_tag:
  295. raw_data = json.loads(script_tag.string)
  296. new_title = construct_dynamic_title(raw_data,selected_pt)
  297. else:
  298. new_title = "Could not found attribute name on product details page"
  299. except:
  300. new_title = "Could not found attribute name on product details page"
  301. else:
  302. new_title = "URL Missing"
  303. # Update the DataFrame column for this row
  304. df.at[index, 'New_Generated_Title'] = new_title
  305. results_for_ui.append({
  306. "id" : index + 1,
  307. "url": final_url,
  308. "new_title": new_title,
  309. "status": True
  310. })
  311. # Generates a random float between 3.0 and 7.0
  312. time.sleep(random.uniform(3, 7))
  313. # time.sleep(1) # Safety delay
  314. # 4. Save the modified Excel to a new path
  315. output_filename = f"processed_{excel_file.name}"
  316. output_path = os.path.join(fs.location, output_filename)
  317. df.to_excel(output_path, index=False)
  318. return JsonResponse({
  319. 'success': True,
  320. 'results': results_for_ui,
  321. 'download_url': fs.url(output_filename)
  322. })
  323. finally:
  324. if os.path.exists(file_path): os.remove(file_path)
  325. # GET request: Fetch all product types for the dropdown
  326. product_types = TitleMapping.objects.all().values_list('product_type', flat=True)
  327. return render(request, 'title_creator_index.html', {'product_types': product_types})
  328. # return render(request, 'title_creator_index.html')
  329. def process_excel_task(file_path, selected_pt, task_id):
  330. # Retrieve the task record from the database
  331. scraper = cloudscraper.create_scraper() # This replaces requests.get
  332. task = ProcessingTask.objects.get(task_id=task_id)
  333. try:
  334. # 1. Read Excel
  335. df = pd.read_excel(file_path)
  336. # 2. Add the NEW COLUMN if it doesn't exist
  337. if 'New_Generated_Title' not in df.columns:
  338. df['New_Generated_Title'] = ""
  339. headers = {"User-Agent": "Mozilla/5.0"}
  340. # Specific Headers for the Item# API
  341. api_headers = {
  342. "accept": "application/json, text/plain, */*",
  343. "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
  344. "client_id": "GEC",
  345. "referer": "https://www.globalindustrial.com/"
  346. }
  347. # 3. Process each row
  348. for index, row in df.iterrows():
  349. url = row.get('URL')
  350. new_title = ""
  351. item_number = row.get('Item#')
  352. final_url = None
  353. # Step 1: Resolve the URL
  354. if pd.notna(url) and str(url).startswith('http'):
  355. final_url = url
  356. elif pd.notna(item_number):
  357. # Call API to get URL from Item#
  358. api_url = f"https://www.globalindustrial.com/catalogApis/catalog/autosuggest?key={item_number}&features=true"
  359. try:
  360. api_resp = requests.get(api_url, headers=api_headers, timeout=10)
  361. if api_resp.status_code == 200:
  362. data = api_resp.json()
  363. final_url = data.get('exactMatch', {}).get('canonicalLink')
  364. except Exception as e:
  365. new_title = f"API Error for Item# {item_number}"
  366. if pd.notna(final_url):
  367. try:
  368. # Scraping logic
  369. # resp = scraper.get(url, timeout=15)
  370. resp = requests.get(final_url, headers=headers, timeout=10)
  371. if resp.status_code == 200:
  372. soup = BeautifulSoup(resp.content, 'html.parser')
  373. script_tag = soup.find('script', id='__NEXT_DATA__')
  374. if script_tag:
  375. try:
  376. raw_data = json.loads(script_tag.string)
  377. # Calling your dynamic title helper
  378. new_title = construct_dynamic_title(raw_data, selected_pt)
  379. except Exception:
  380. new_title = "Data Parsing Error"
  381. else:
  382. new_title = "Could not found attribute name on product details page"
  383. else:
  384. new_title = f"HTTP Error: {resp.status_code}"
  385. except Exception:
  386. new_title = "Request Failed (Timeout/Connection)"
  387. else:
  388. new_title = "URL Missing"
  389. # Update the DataFrame
  390. df.at[index, 'New_Generated_Title'] = new_title
  391. # Optional: Sleep to prevent getting blocked by the server
  392. # Generates a random float between 3.0 and 7.0
  393. time.sleep(random.uniform(3, 7))
  394. # time.sleep(1)
  395. # 4. Save the modified Excel to the MEDIA folder
  396. output_filename = f"completed_{task_id}_{task.original_filename}"
  397. # Ensure media directory exists
  398. if not os.path.exists(settings.MEDIA_ROOT):
  399. os.makedirs(settings.MEDIA_ROOT)
  400. output_path = os.path.join(settings.MEDIA_ROOT, output_filename)
  401. df.to_excel(output_path, index=False)
  402. # 5. Final Status Update
  403. task.status = 'COMPLETED'
  404. # Construct the URL for the frontend to download
  405. task.download_url = f"{settings.MEDIA_URL}{output_filename}"
  406. task.save()
  407. except Exception as e:
  408. print(f"Critical Task Failure: {e}")
  409. task.status = 'FAILED'
  410. task.save()
  411. finally:
  412. # 6. Cleanup the temporary uploaded file
  413. if os.path.exists(file_path):
  414. os.remove(file_path)
  415. @login_required
  416. def title_creator_async_view(request):
  417. if request.method == 'POST' and request.FILES.get('file'):
  418. excel_file = request.FILES['file']
  419. selected_pt = request.POST.get('product_type')
  420. # 1. Save file temporarily
  421. fs = FileSystemStorage()
  422. filename = fs.save(f"temp_{uuid.uuid4().hex}_{excel_file.name}", excel_file)
  423. file_path = fs.path(filename)
  424. # 2. Create Task Record
  425. task_id = str(uuid.uuid4())
  426. ProcessingTask.objects.create(
  427. task_id=task_id,
  428. original_filename=excel_file.name,
  429. status='PENDING'
  430. )
  431. # 3. Start Background Thread
  432. thread = threading.Thread(
  433. target=process_excel_task,
  434. args=(file_path, selected_pt, task_id)
  435. )
  436. thread.start()
  437. return JsonResponse({
  438. 'status': 'started',
  439. 'task_id': task_id,
  440. 'message': 'File is processing in the background.'
  441. })
  442. return JsonResponse({'error': 'Invalid request'}, status=400)
  443. # 2. This view is called repeatedly by pollStatus() in your JS
  444. def check_status(request, task_id):
  445. # Look up the task in the database
  446. task = get_object_or_404(ProcessingTask, task_id=task_id)
  447. return JsonResponse({
  448. 'status': task.status, # 'PENDING', 'COMPLETED', or 'FAILED'
  449. 'file_name': task.original_filename,
  450. 'download_url': task.download_url # This will be null until status is COMPLETED
  451. })
  452. @login_required
  453. def title_creator_history_page(request):
  454. # Renders the HTML page
  455. return render(request, 'title_creator_history.html')
  456. @login_required
  457. def get_title_creator_tasks_json(request):
  458. # Returns the list of tasks as JSON for the history table
  459. tasks = ProcessingTask.objects.all().order_by('-created_at')[:50] # Latest 50 tasks
  460. data = []
  461. for t in tasks:
  462. data.append({
  463. 'task_id': t.task_id,
  464. 'filename': t.original_filename or "Unknown File",
  465. 'status': t.status,
  466. 'url': t.download_url,
  467. 'date': t.created_at.strftime("%d %b %Y, %I:%M %p")
  468. })
  469. return JsonResponse(data, safe=False)