views.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. import os
  2. import json
  3. import time
  4. import requests
  5. import uuid
  6. import threading
  7. import pandas as pd
  8. from bs4 import BeautifulSoup
  9. from django.shortcuts import get_object_or_404, redirect, render
  10. from django.core.files.storage import FileSystemStorage
  11. from django.http import JsonResponse
  12. from .models import TitleMapping, AttributeMaster,ProcessingTask # <--- THIS FIXES THE ERROR
  13. from django.conf import settings
  14. import cloudscraper
  15. from django.contrib import messages
  16. from django.contrib.auth import authenticate, login, logout
  17. # from django.contrib.auth.decorators import login_required
  18. from .decorators import login_required
  19. from django.contrib.auth.hashers import make_password
  20. import random
  21. # To login
  22. def login_view(request):
  23. if request.method == "POST":
  24. email = request.POST.get("username")
  25. password = request.POST.get("password")
  26. print("Email: ", email)
  27. print("Password: ", password)
  28. # Authenticate the user
  29. user = authenticate(request, username=email, password=password)
  30. print("user",user)
  31. if user is not None:
  32. print("User authenticated successfully.")
  33. login(request, user)
  34. request.session['user_email'] = user.email
  35. # request.session = user
  36. # request.session['full_name'] = f"{user.firstName} {user.lastName or ''}".strip()
  37. # # Store both human-readable role and code
  38. # request.session['role'] = user.get_role_display() # 'Super Admin', 'Admin', 'RTA'
  39. # request.session['role_code'] = user.role # '0', '1', '2'
  40. # request.session['joining_date'] = user.createdDate.strftime("%b, %Y")
  41. # request.session['userId'] = user.userId
  42. # 📌 Store client_id if user has a client associated
  43. # request.session['client_id'] = user.client.clientId if user.client else None
  44. return redirect('title_creator_home')
  45. else:
  46. print("Invalid credentials.")
  47. messages.error(request, "Invalid email or password.")
  48. return redirect('login')
  49. print("Rendering login page.")
  50. return render(request, 'login.html')
  51. # To logout
  52. @login_required
  53. def logout_view(request):
  54. logout(request)
  55. messages.success(request, "You have been logged out successfully.")
  56. return redirect('login')
  57. @login_required
  58. def master_config_view(request):
  59. if request.method == 'POST':
  60. action = request.POST.get('action')
  61. # Part 1: Add New Attribute
  62. if action == 'add_attribute':
  63. name = request.POST.get('attr_name')
  64. is_m = request.POST.get('is_mandatory') == 'on'
  65. if name:
  66. AttributeMaster.objects.get_or_create(name=name.strip(), defaults={'is_mandatory': is_m})
  67. # Part 2: Add New Title Mapping (Product Type)
  68. # --- MAPPING ACTIONS (CREATE & UPDATE) ---
  69. elif action in ['add_mapping', 'update_mapping']:
  70. pt = request.POST.get('pt_name')
  71. seq = request.POST.get('sequence')
  72. edit_id = request.POST.get('edit_id')
  73. if action == 'update_mapping' and edit_id:
  74. # Update existing
  75. mapping = get_object_or_404(TitleMapping, id=edit_id)
  76. mapping.product_type = pt.strip()
  77. mapping.format_sequence = seq
  78. mapping.save()
  79. else:
  80. # Create new (using get_or_create to prevent exact duplicates)
  81. if pt:
  82. TitleMapping.objects.get_or_create(
  83. product_type=pt.strip(),
  84. defaults={'format_sequence': seq}
  85. )
  86. # --- MAPPING DELETE ---
  87. elif action == 'delete_mapping':
  88. mapping_id = request.POST.get('id')
  89. TitleMapping.objects.filter(id=mapping_id).delete()
  90. # Part 3: Delete functionality
  91. elif action == 'delete_attribute':
  92. AttributeMaster.objects.filter(id=request.POST.get('id')).delete()
  93. return redirect('title_creator_master')
  94. # GET: Load all data
  95. context = {
  96. 'attributes': AttributeMaster.objects.all().order_by('name'),
  97. 'mappings': TitleMapping.objects.all().order_by('product_type'),
  98. }
  99. return render(request, 'title_creator_master.html', context)
  100. def save_config_api(request):
  101. if request.method == 'POST':
  102. try:
  103. data = json.loads(request.body)
  104. # Update Mandatory Attributes
  105. # Expected data: { "mandatory_ids": [1, 3, 5] }
  106. AttributeMaster.objects.all().update(is_mandatory=False)
  107. AttributeMaster.objects.filter(id__in=data.get('mandatory_ids', [])).update(is_mandatory=True)
  108. # Update Title Sequences
  109. # Expected data: { "mappings": [{"id": 1, "sequence": "Brand,Color"}] }
  110. for m in data.get('mappings', []):
  111. TitleMapping.objects.filter(id=m['id']).update(format_sequence=m['sequence'])
  112. return JsonResponse({'success': True})
  113. except Exception as e:
  114. return JsonResponse({'success': False, 'error': str(e)})
  115. # def extract_title_or_error(product,selected_pt):
  116. # # 1. Identify Product Type from JSON to fetch the correct Mapping
  117. # pt_name = selected_pt
  118. # try:
  119. # mapping = TitleMapping.objects.get(product_type=pt_name)
  120. # config_sequence = mapping.get_sequence_list()
  121. # except TitleMapping.DoesNotExist:
  122. # return f"No Title Configuration found for Product Type: {pt_name}"
  123. # # 2. Get Mandatory list from DB
  124. # mandatory_fields = list(AttributeMaster.objects.filter(is_mandatory=True).values_list('name', flat=True))
  125. # # 3. Data Extraction (Your logic)
  126. # extracted_data = {
  127. # "Brand": product.get("brand"),
  128. # "Product Type": pt_name
  129. # }
  130. # dimensions = {}
  131. # for group in product.get("attributeGroups", []):
  132. # for attr in group.get("attributes", []):
  133. # desc = attr.get("attributeDesc")
  134. # value = attr.get("attributeValue")
  135. # if desc == "Capacity":
  136. # extracted_data[desc] = f"Capacity {value}"
  137. # if desc in ["Door Type", "Capacity", "Color"]:
  138. # extracted_data[desc] = value
  139. # elif desc in ["Width", "Depth", "Height"]:
  140. # dimensions[desc] = value
  141. # if {"Width", "Depth", "Height"}.issubset(dimensions):
  142. # # extracted_data["Dimensions"] = f'{dimensions["Width"]} x {dimensions["Depth"]} x {dimensions["Height"]}'
  143. # w, d, h = dimensions["Width"], dimensions["Depth"], dimensions["Height"]
  144. # extracted_data["Dimensions"] = f'{w}"w x {d}"d x {h}"h'
  145. # # 4. Build Title and Check Mandatory Rules from DB
  146. # final_title_parts = []
  147. # missing_mandatory = []
  148. # for attr_name in config_sequence:
  149. # val = extracted_data.get(attr_name)
  150. # if not val or str(val).strip() == "":
  151. # # If DB says it's mandatory, track the error
  152. # if attr_name in mandatory_fields:
  153. # missing_mandatory.append(attr_name)
  154. # continue
  155. # final_title_parts.append(str(val))
  156. # # 5. Result
  157. # if missing_mandatory:
  158. # return f"Could not found {', '.join(missing_mandatory)} on Product Details page"
  159. # return " ".join(final_title_parts)
  160. def extract_title_or_error(product, selected_pt):
  161. # 1. Identify Product Type
  162. pt_name = selected_pt
  163. try:
  164. mapping = TitleMapping.objects.get(product_type=pt_name)
  165. config_sequence = mapping.get_sequence_list()
  166. except TitleMapping.DoesNotExist:
  167. return f"No Title Configuration found for Product Type: {pt_name}"
  168. mandatory_fields = list(AttributeMaster.objects.filter(is_mandatory=True).values_list('name', flat=True))
  169. # 2. Data Extraction
  170. extracted_data = {
  171. "Brand": product.get("brand")+"©",
  172. "Product Type": pt_name
  173. }
  174. dimensions = {}
  175. for group in product.get("attributeGroups", []):
  176. for attr in group.get("attributes", []):
  177. desc = attr.get("attributeDesc")
  178. val = attr.get("attributeValue")
  179. if desc == "Capacity":
  180. extracted_data[desc] = f"Capacity {val}"
  181. elif desc in ["Door Type", "Color"]:
  182. extracted_data[desc] = val
  183. elif desc in ["Width", "Depth", "Height"]:
  184. dimensions[desc] = val
  185. if {"Width", "Depth", "Height"}.issubset(dimensions):
  186. w, d, h = dimensions["Width"], dimensions["Depth"], dimensions["Height"]
  187. # We use .replace(" in", "") to remove the existing unit before adding the " symbol
  188. w = dimensions["Width"].replace(" in", "").strip()
  189. d = dimensions["Depth"].replace(" in", "").strip()
  190. h = dimensions["Height"].replace(" in", "").strip()
  191. extracted_data["Dimensions"] = f'{w}"W x {d}"D x {h}"H'
  192. # 3. Build Title Parts
  193. final_title_parts = []
  194. missing_mandatory = []
  195. for attr_name in config_sequence:
  196. val = extracted_data.get(attr_name)
  197. if not val or str(val).strip() == "":
  198. if attr_name in mandatory_fields:
  199. missing_mandatory.append(attr_name)
  200. continue
  201. final_title_parts.append(str(val))
  202. if missing_mandatory:
  203. return f"Could not found {', '.join(missing_mandatory)} on Product Details page"
  204. # Helper function to join parts: Brand PT, Param1, Param2
  205. def construct_string(parts):
  206. if len(parts) <= 2:
  207. return " ".join(parts)
  208. return f"{parts[0]} {parts[1]}, {', '.join(parts[2:])}"
  209. current_title = construct_string(final_title_parts)
  210. # 4. Length Reduction Logic (Step-by-Step)
  211. print("Current Title 1 ########",current_title,len(current_title))
  212. # Step 1: Change "Capacity" -> "Cap."
  213. if len(current_title) > 100:
  214. for i, part in enumerate(final_title_parts):
  215. if "Capacity" in part:
  216. final_title_parts[i] = part.replace("Capacity", "Cap.")
  217. current_title = construct_string(final_title_parts)
  218. print("Current Title 2 ########",current_title,len(current_title))
  219. # Step 2: Shorten Product Type (e.g., Stainless Steel -> SS)
  220. # Step B: Dynamic Product Type Acronym
  221. if len(current_title) > 100:
  222. pt_part = final_title_parts[1]
  223. words = pt_part.split()
  224. if len(words) > 1:
  225. # Takes first letter of every word in the Product Type
  226. final_title_parts[1] = "".join([w[0].upper() for w in words])
  227. current_title = construct_string(final_title_parts)
  228. print("Current Title 3 ########",current_title,len(current_title))
  229. # Step 3: Remove spaces from attributes starting from the back
  230. # Brand (0) and Product Type (1) are skipped
  231. if len(current_title) > 100:
  232. for i in range(len(final_title_parts) - 1, 1, -1):
  233. if len(current_title) <= 100:
  234. break
  235. # Remove white spaces from the current attribute part
  236. final_title_parts[i] = final_title_parts[i].replace(" ", "")
  237. current_title = construct_string(final_title_parts)
  238. print("Current Title 4 ########",current_title,len(current_title))
  239. return current_title
  240. def construct_dynamic_title(raw_data,selected_pt):
  241. try:
  242. product = raw_data.get("props", {}).get("pageProps", {}).get("product", {})
  243. if not product: return "Product data not found"
  244. return extract_title_or_error(product,selected_pt).strip()
  245. except Exception:
  246. return "Could not found attribute name on product details page"
  247. @login_required
  248. def title_creator_view(request):
  249. if request.method == 'POST' and request.FILES.get('file'):
  250. scraper = cloudscraper.create_scraper()
  251. excel_file = request.FILES['file']
  252. selected_pt = request.POST.get('product_type')
  253. fs = FileSystemStorage()
  254. filename = fs.save(excel_file.name, excel_file)
  255. file_path = fs.path(filename)
  256. try:
  257. # 1. Read Excel
  258. df = pd.read_excel(file_path)
  259. # 2. Add the NEW COLUMN if it doesn't exist
  260. if 'New_Generated_Title' not in df.columns:
  261. df['New_Generated_Title'] = ""
  262. headers = {"User-Agent": "Mozilla/5.0"}
  263. results_for_ui = []
  264. # 3. Process each row
  265. for index, row in df.iterrows():
  266. url = row.get('URL') # Assumes your excel has a 'URL' column
  267. item_number = row.get('Item#')
  268. new_title = ""
  269. final_url = None
  270. # Step 1: Resolve the URL
  271. if pd.notna(url) and str(url).startswith('http'):
  272. final_url = url
  273. elif pd.notna(item_number):
  274. # Specific Headers for the Item# API
  275. dynamic_token = get_fresh_token(scraper)
  276. api_headers = {
  277. "accept": "application/json, text/plain, */*",
  278. "authorization": f"Bearer {dynamic_token}",
  279. # "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
  280. "client_id": "GEC",
  281. "referer": "https://www.globalindustrial.com/"
  282. }
  283. # Call API to get URL from Item#
  284. api_url = f"https://www.globalindustrial.com/catalogApis/catalog/autosuggest?key={item_number}&features=true"
  285. try:
  286. api_resp = requests.get(api_url, headers=api_headers, timeout=10)
  287. if api_resp.status_code == 200:
  288. data = api_resp.json()
  289. final_url = data.get('exactMatch', {}).get('canonicalLink')
  290. except Exception as e:
  291. new_title = f"API Error for Item# {item_number}"
  292. if pd.notna(final_url):
  293. try:
  294. resp = requests.get(final_url, headers=headers, timeout=10)
  295. soup = BeautifulSoup(resp.content, 'html.parser')
  296. script_tag = soup.find('script', id='__NEXT_DATA__')
  297. if script_tag:
  298. raw_data = json.loads(script_tag.string)
  299. new_title = construct_dynamic_title(raw_data,selected_pt)
  300. else:
  301. new_title = "Could not found attribute name on product details page"
  302. except:
  303. new_title = "Could not found attribute name on product details page"
  304. else:
  305. new_title = "URL Missing"
  306. # Update the DataFrame column for this row
  307. df.at[index, 'New_Generated_Title'] = new_title
  308. results_for_ui.append({
  309. "id" : index + 1,
  310. "url": final_url,
  311. "new_title": new_title,
  312. "status": True
  313. })
  314. # Generates a random float between 3.0 and 7.0
  315. time.sleep(random.uniform(3, 7))
  316. # time.sleep(1) # Safety delay
  317. # 4. Save the modified Excel to a new path
  318. output_filename = f"processed_{excel_file.name}"
  319. output_path = os.path.join(fs.location, output_filename)
  320. df.to_excel(output_path, index=False)
  321. return JsonResponse({
  322. 'success': True,
  323. 'results': results_for_ui,
  324. 'download_url': fs.url(output_filename)
  325. })
  326. finally:
  327. if os.path.exists(file_path): os.remove(file_path)
  328. # GET request: Fetch all product types for the dropdown
  329. product_types = TitleMapping.objects.all().values_list('product_type', flat=True)
  330. return render(request, 'title_creator_index.html', {'product_types': product_types})
  331. # return render(request, 'title_creator_index.html')
  332. async def get_fresh_token(scraper):
  333. """Hits the homepage once to extract the latest Bearer token."""
  334. base_url = "https://www.globalindustrial.com"
  335. try:
  336. # One-time hit to the base URL
  337. response = scraper.get(base_url, timeout=15)
  338. # 1. Check Cookies for 'Authorization'
  339. token = scraper.cookies.get('Authorization')
  340. if token:
  341. return token.replace('Bearer ', '').strip()
  342. # 2. Check __NEXT_DATA__ script in HTML
  343. soup = BeautifulSoup(response.content, 'html.parser')
  344. script_tag = soup.find('script', id='__NEXT_DATA__')
  345. if script_tag:
  346. data = json.loads(script_tag.string)
  347. # Standard Next.js path for auth tokens
  348. token = data.get('props', {}).get('pageProps', {}).get('token')
  349. if token:
  350. return token
  351. except Exception as e:
  352. print(f"Token retrieval failed: {e}")
  353. # Fallback to your hardcoded token if extraction fails
  354. return "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA"
  355. async def process_excel_task(file_path, selected_pt, task_id):
  356. # Retrieve the task record from the database
  357. scraper = cloudscraper.create_scraper() # This replaces requests.get
  358. task = ProcessingTask.objects.get(task_id=task_id)
  359. try:
  360. # 1. Read Excel
  361. df = pd.read_excel(file_path)
  362. # 2. Add the NEW COLUMN if it doesn't exist
  363. if 'New_Generated_Title' not in df.columns:
  364. df['New_Generated_Title'] = ""
  365. headers = {"User-Agent": "Mozilla/5.0"}
  366. # 3. Process each row
  367. for index, row in df.iterrows():
  368. url = row.get('URL')
  369. new_title = ""
  370. item_number = row.get('Item#')
  371. final_url = None
  372. # Step 1: Resolve the URL
  373. if pd.notna(url) and str(url).startswith('http'):
  374. final_url = url
  375. elif pd.notna(item_number):
  376. dynamic_token = await get_fresh_token(scraper)
  377. # Specific Headers for the Item# API
  378. api_headers = {
  379. "accept": "application/json, text/plain, */*",
  380. "authorization": f"Bearer {dynamic_token}",#"Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
  381. "client_id": "GEC",
  382. "referer": "https://www.globalindustrial.com/"
  383. }
  384. # Call API to get URL from Item#
  385. api_url = f"https://www.globalindustrial.com/catalogApis/catalog/autosuggest?key={item_number}&features=true"
  386. try:
  387. api_resp = requests.get(api_url, headers=api_headers, timeout=10)
  388. if api_resp.status_code == 200:
  389. data = api_resp.json()
  390. final_url = data.get('exactMatch', {}).get('canonicalLink')
  391. except Exception as e:
  392. new_title = f"API Error for Item# {item_number}"
  393. if pd.notna(final_url):
  394. try:
  395. # Scraping logic
  396. # resp = scraper.get(url, timeout=15)
  397. resp = requests.get(final_url, headers=headers, timeout=10)
  398. if resp.status_code == 200:
  399. soup = BeautifulSoup(resp.content, 'html.parser')
  400. script_tag = soup.find('script', id='__NEXT_DATA__')
  401. if script_tag:
  402. try:
  403. raw_data = json.loads(script_tag.string)
  404. # Calling your dynamic title helper
  405. new_title = construct_dynamic_title(raw_data, selected_pt)
  406. except Exception:
  407. new_title = "Data Parsing Error"
  408. else:
  409. new_title = "Could not found attribute name on product details page"
  410. else:
  411. new_title = f"HTTP Error: {resp.status_code}"
  412. except Exception:
  413. new_title = "Request Failed (Timeout/Connection)"
  414. else:
  415. new_title = "URL Missing"
  416. # Update the DataFrame
  417. df.at[index, 'New_Generated_Title'] = new_title
  418. # Optional: Sleep to prevent getting blocked by the server
  419. # Generates a random float between 3.0 and 7.0
  420. time.sleep(random.uniform(3, 7))
  421. # time.sleep(1)
  422. # 4. Save the modified Excel to the MEDIA folder
  423. output_filename = f"completed_{task_id}_{task.original_filename}"
  424. # Ensure media directory exists
  425. if not os.path.exists(settings.MEDIA_ROOT):
  426. os.makedirs(settings.MEDIA_ROOT)
  427. output_path = os.path.join(settings.MEDIA_ROOT, output_filename)
  428. df.to_excel(output_path, index=False)
  429. # 5. Final Status Update
  430. task.status = 'COMPLETED'
  431. # Construct the URL for the frontend to download
  432. task.download_url = f"{settings.MEDIA_URL}{output_filename}"
  433. task.save()
  434. except Exception as e:
  435. print(f"Critical Task Failure: {e}")
  436. task.status = 'FAILED'
  437. task.save()
  438. finally:
  439. # 6. Cleanup the temporary uploaded file
  440. if os.path.exists(file_path):
  441. os.remove(file_path)
  442. @login_required
  443. def title_creator_async_view(request):
  444. if request.method == 'POST' and request.FILES.get('file'):
  445. excel_file = request.FILES['file']
  446. selected_pt = request.POST.get('product_type')
  447. # 1. Save file temporarily
  448. fs = FileSystemStorage()
  449. filename = fs.save(f"temp_{uuid.uuid4().hex}_{excel_file.name}", excel_file)
  450. file_path = fs.path(filename)
  451. # 2. Create Task Record
  452. task_id = str(uuid.uuid4())
  453. ProcessingTask.objects.create(
  454. task_id=task_id,
  455. original_filename=excel_file.name,
  456. status='PENDING'
  457. )
  458. # 3. Start Background Thread
  459. thread = threading.Thread(
  460. target=process_excel_task,
  461. args=(file_path, selected_pt, task_id)
  462. )
  463. thread.start()
  464. return JsonResponse({
  465. 'status': 'started',
  466. 'task_id': task_id,
  467. 'message': 'File is processing in the background.'
  468. })
  469. return JsonResponse({'error': 'Invalid request'}, status=400)
  470. # 2. This view is called repeatedly by pollStatus() in your JS
  471. def check_status(request, task_id):
  472. # Look up the task in the database
  473. task = get_object_or_404(ProcessingTask, task_id=task_id)
  474. return JsonResponse({
  475. 'status': task.status, # 'PENDING', 'COMPLETED', or 'FAILED'
  476. 'file_name': task.original_filename,
  477. 'download_url': task.download_url # This will be null until status is COMPLETED
  478. })
  479. @login_required
  480. def title_creator_history_page(request):
  481. # Renders the HTML page
  482. return render(request, 'title_creator_history.html')
  483. @login_required
  484. def get_title_creator_tasks_json(request):
  485. # Returns the list of tasks as JSON for the history table
  486. tasks = ProcessingTask.objects.all().order_by('-created_at')[:50] # Latest 50 tasks
  487. data = []
  488. for t in tasks:
  489. data.append({
  490. 'task_id': t.task_id,
  491. 'filename': t.original_filename or "Unknown File",
  492. 'status': t.status,
  493. 'url': t.download_url,
  494. 'date': t.created_at.strftime("%d %b %Y, %I:%M %p")
  495. })
  496. return JsonResponse(data, safe=False)