2 miesięcy temu · abf1085937
--- a/media/examples/title_creator/example_item_no.xlsx
+++ b/media/examples/title_creator/example_item_no.xlsx
--- a/media/examples/title_creator/example_url.xlsx
+++ b/media/examples/title_creator/example_url.xlsx
--- a/title_creator_app/spiders.py
+++ b/title_creator_app/spiders.py
@@ -0,0 +1,53 @@
 
				+# spiders.py
			
 
				+import json
			
 
				+import scrapy
			
 
				+
			
 
				+class TitleCreatorSpider(scrapy.Spider):
			
 
				+    name = "title_creator"
			
 
				+
			
 
				+    def __init__(self, urls=None, selected_pt=None, *args, **kwargs):
			
 
				+        super(TitleCreatorSpider, self).__init__(*args, **kwargs)
			
 
				+        self.start_urls = urls or []
			
 
				+        self.selected_pt = selected_pt
			
 
				+        self.results = []
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        # We import here to avoid 'AppRegistryNotReady' errors in Django
			
 
				+        from .views import construct_dynamic_title 
			
 
				+        new_title = ""
			
 
				+        status_d = False
			
 
				+    
			
 
				+        if response.status != 200:
			
 
				+            new_title = f"Failed (HTTP {response.status})"
			
 
				+            status_d = False
			
 
				+        else:
			
 
				+            print("response",response)
			
 
				+            script_tag = response.css('script#__NEXT_DATA__::text').get()
			
 
				+            if script_tag:
			
 
				+                try:
			
 
				+                    raw_data = json.loads(script_tag)
			
 
				+                    new_title = construct_dynamic_title(raw_data, self.selected_pt)
			
 
				+                    status_d = True
			
 
				+                except Exception:
			
 
				+                    new_title = "Data Parsing Error"
			
 
				+                    status_d = False
			
 
				+            else:
			
 
				+                new_title = "Attribute not found (Empty Script)"
			
 
				+                status_d = False
			
 
				+                
			
 
				+        # script_tag = response.css('script#__NEXT_DATA__::text').get()
			
 
				+        # if script_tag:
			
 
				+        #     try:
			
 
				+        #         raw_data = json.loads(script_tag)
			
 
				+        #         new_title = construct_dynamic_title(raw_data, self.selected_pt)
			
 
				+        #     except Exception:
			
 
				+        #         new_title = "Data Parsing Error"
			
 
				+        # else:
			
 
				+        #     new_title = "Could not find __NEXT_DATA__"
			
 
				+
			
 
				+        self.results.append({
			
 
				+            "id": self.selected_pt,
			
 
				+            "url": response.url,
			
 
				+            "new_title": new_title,
			
 
				+            "status": status_d
			
 
				+        })
			
--- a/title_creator_app/templates/title_creator_index.html
+++ b/title_creator_app/templates/title_creator_index.html
@@ -158,7 +158,27 @@
 
				         <div class="mb-10 flex flex-col md:flex-row md:items-end md:justify-between gap-4 border-b border-gray-100 pb-6">
			
 
				             <div class="text-left">
			
 
				                 <h1 class="text-4xl font-black text-gray-900 tracking-tight mb-2">
			
 
				-                    Title Creator
			
 
				+                    Title Creator 
			
 
				+                        <!-- <div class="mt-4 md:mt-0 flex items-center gap-3"> -->
			
 
				+        <!-- <span class="text-xs font-semibold text-gray-400 uppercase tracking-wider mr-2">Templates:</span> -->
			
 
				+        
			
 
				+        <a href="./../media/examples/title_creator/example_url.xlsx" 
			
 
				+           class="inline-flex items-center px-4 py-2 bg-white border border-gray-300 rounded-lg text-sm font-medium text-gray-700 hover:bg-gray-50 hover:text-blue-600 transition-all shadow-sm gap-2"
			
 
				+           title="Download URL Template" download>
			
 
				+            <i class="bi bi-download"></i>
			
 
				+            URL Template
			
 
				+        </a>
			
 
				+
			
 
				+        <a href="./../media/examples/title_creator/example_item_no.xlsx" 
			
 
				+           class="inline-flex items-center px-4 py-2 bg-white border border-gray-300 rounded-lg text-sm font-medium text-gray-700 hover:bg-gray-50 hover:text-blue-600 transition-all shadow-sm gap-2"
			
 
				+           title="Download Item Number Template" download>
			
 
				+            <i class="bi bi-download"></i>
			
 
				+            Item No. Template
			
 
				+        </a>
			
 
				+    <!-- </div> -->
			
 
				+                    <!-- &nbsp;<span><a href="./../media/examples/title_creator/example_url.xlsx"    title="Example file" download><i class="bi bi-download"></i>Url</a></span>
			
 
				+                    &nbsp;<span><a href="./../media/examples/title_creator/example_item_no.xlsx"    title="Example file" download><i class="bi bi-download"></i>Item No.</a></span> -->
			
 
				+
			
 
				                 </h1>
			
 
				                 <p class="text-gray-500 font-medium ">
			
 
				                     Upload Excel URLs to generate dynamic titles from Data.
			
--- a/title_creator_app/views.py
+++ b/title_creator_app/views.py
@@ -17,7 +17,7 @@ from django.contrib.auth import authenticate, login, logout
 
				 # from django.contrib.auth.decorators import login_required
			
 
				 from .decorators import login_required
			
 
				 from django.contrib.auth.hashers import make_password
			
 
				-
			
 
				+import random
			
 
				 
			
 
				 # To login
			
 
				 def login_view(request):
			
@@ -227,7 +227,7 @@ def extract_title_or_error(product, selected_pt):
 
				         w = dimensions["Width"].replace(" in", "").strip()
			
 
				         d = dimensions["Depth"].replace(" in", "").strip()
			
 
				         h = dimensions["Height"].replace(" in", "").strip()
			
 
				-        extracted_data["Dimensions"] = f'{w}"w x {d}"d x {h}"h'
			
 
				+        extracted_data["Dimensions"] = f'{w}"W x {d}"D x {h}"H'
			
 
				 
			
 
				     # 3. Build Title Parts
			
 
				     final_title_parts = []
			
@@ -253,7 +253,7 @@ def extract_title_or_error(product, selected_pt):
 
				     current_title = construct_string(final_title_parts)
			
 
				 
			
 
				     # 4. Length Reduction Logic (Step-by-Step)
			
 
				-    
			
 
				+    print("Current Title 1 ########",current_title,len(current_title))
			
 
				     # Step 1: Change "Capacity" -> "Cap."
			
 
				     if len(current_title) > 100:
			
 
				         for i, part in enumerate(final_title_parts):
			
@@ -261,6 +261,8 @@ def extract_title_or_error(product, selected_pt):
 
				                 final_title_parts[i] = part.replace("Capacity", "Cap.")
			
 
				         current_title = construct_string(final_title_parts)
			
 
				 
			
 
				+    print("Current Title 2 ########",current_title,len(current_title))    
			
 
				+
			
 
				     # Step 2: Shorten Product Type (e.g., Stainless Steel -> SS)
			
 
				     # Step B: Dynamic Product Type Acronym
			
 
				     if len(current_title) > 100:
			
@@ -270,7 +272,7 @@ def extract_title_or_error(product, selected_pt):
 
				             # Takes first letter of every word in the Product Type
			
 
				             final_title_parts[1] = "".join([w[0].upper() for w in words])
			
 
				             current_title = construct_string(final_title_parts)
			
 
				-
			
 
				+    print("Current Title 3 ########",current_title,len(current_title))
			
 
				     # Step 3: Remove spaces from attributes starting from the back
			
 
				     # Brand (0) and Product Type (1) are skipped
			
 
				     if len(current_title) > 100:
			
@@ -280,7 +282,7 @@ def extract_title_or_error(product, selected_pt):
 
				             # Remove white spaces from the current attribute part
			
 
				             final_title_parts[i] = final_title_parts[i].replace(" ", "")
			
 
				             current_title = construct_string(final_title_parts)
			
 
				-
			
 
				+    print("Current Title 4 ########",current_title,len(current_title))
			
 
				     return current_title
			
 
				 
			
 
				 def construct_dynamic_title(raw_data,selected_pt):
			
@@ -311,15 +313,39 @@ def title_creator_view(request):
 
				 
			
 
				             headers = {"User-Agent": "Mozilla/5.0"}
			
 
				             results_for_ui = []
			
 
				+            # Specific Headers for the Item# API
			
 
				+            api_headers = {
			
 
				+                "accept": "application/json, text/plain, */*",
			
 
				+                "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
			
 
				+                "client_id": "GEC",
			
 
				+                "referer": "https://www.globalindustrial.com/"
			
 
				+            }
			
 
				 
			
 
				             # 3. Process each row
			
 
				             for index, row in df.iterrows():
			
 
				                 url = row.get('URL') # Assumes your excel has a 'URL' column
			
 
				+                item_number = row.get('Item#')
			
 
				                 new_title = ""
			
 
				+                final_url = None
			
 
				+
			
 
				+                # Step 1: Resolve the URL
			
 
				+                if pd.notna(url) and str(url).startswith('http'):
			
 
				+                    final_url = url
			
 
				+                elif pd.notna(item_number):
			
 
				+                    # Call API to get URL from Item#
			
 
				+                    api_url = f"https://www.globalindustrial.com/catalogApis/catalog/autosuggest?key={item_number}&features=true"
			
 
				+                    try:
			
 
				+                        api_resp = requests.get(api_url, headers=api_headers, timeout=10)
			
 
				+                        if api_resp.status_code == 200:
			
 
				+                            data = api_resp.json()
			
 
				+                            final_url = data.get('exactMatch', {}).get('canonicalLink')
			
 
				+                    except Exception as e:
			
 
				+                        new_title = f"API Error for Item# {item_number}"
			
 
				+
			
 
				 
			
 
				-                if pd.notna(url):
			
 
				+                if pd.notna(final_url):
			
 
				                     try:
			
 
				-                        resp = requests.get(url, headers=headers, timeout=10)
			
 
				+                        resp = requests.get(final_url, headers=headers, timeout=10)
			
 
				                         soup = BeautifulSoup(resp.content, 'html.parser')
			
 
				                         script_tag = soup.find('script', id='__NEXT_DATA__')
			
 
				                         
			
@@ -338,11 +364,13 @@ def title_creator_view(request):
 
				                 
			
 
				                 results_for_ui.append({
			
 
				                     "id" : index + 1,
			
 
				-                    "url": url,
			
 
				+                    "url": final_url,
			
 
				                     "new_title": new_title,
			
 
				                     "status": True
			
 
				                 })
			
 
				-                time.sleep(1) # Safety delay
			
 
				+                # Generates a random float between 3.0 and 7.0
			
 
				+                time.sleep(random.uniform(3, 7))
			
 
				+                # time.sleep(1) # Safety delay
			
 
				 
			
 
				             # 4. Save the modified Excel to a new path
			
 
				             output_filename = f"processed_{excel_file.name}"
			
@@ -377,17 +405,39 @@ def process_excel_task(file_path, selected_pt, task_id):
 
				             df['New_Generated_Title'] = ""
			
 
				 
			
 
				         headers = {"User-Agent": "Mozilla/5.0"}
			
 
				+        # Specific Headers for the Item# API
			
 
				+        api_headers = {
			
 
				+            "accept": "application/json, text/plain, */*",
			
 
				+            "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
			
 
				+            "client_id": "GEC",
			
 
				+            "referer": "https://www.globalindustrial.com/"
			
 
				+        }
			
 
				 
			
 
				         # 3. Process each row
			
 
				         for index, row in df.iterrows():
			
 
				             url = row.get('URL')
			
 
				             new_title = ""
			
 
				-
			
 
				-            if pd.notna(url):
			
 
				+            item_number = row.get('Item#')
			
 
				+            final_url = None
			
 
				+            # Step 1: Resolve the URL
			
 
				+            if pd.notna(url) and str(url).startswith('http'):
			
 
				+                final_url = url
			
 
				+            elif pd.notna(item_number):
			
 
				+                # Call API to get URL from Item#
			
 
				+                api_url = f"https://www.globalindustrial.com/catalogApis/catalog/autosuggest?key={item_number}&features=true"
			
 
				+                try:
			
 
				+                    api_resp = requests.get(api_url, headers=api_headers, timeout=10)
			
 
				+                    if api_resp.status_code == 200:
			
 
				+                        data = api_resp.json()
			
 
				+                        final_url = data.get('exactMatch', {}).get('canonicalLink')
			
 
				+                except Exception as e:
			
 
				+                    new_title = f"API Error for Item# {item_number}"
			
 
				+
			
 
				+            if pd.notna(final_url):
			
 
				                 try:
			
 
				                     # Scraping logic
			
 
				                     # resp = scraper.get(url, timeout=15)
			
 
				-                    resp = requests.get(url, headers=headers, timeout=10)
			
 
				+                    resp = requests.get(final_url, headers=headers, timeout=10)
			
 
				                     if resp.status_code == 200:
			
 
				                         soup = BeautifulSoup(resp.content, 'html.parser')
			
 
				                         script_tag = soup.find('script', id='__NEXT_DATA__')
			
@@ -412,7 +462,9 @@ def process_excel_task(file_path, selected_pt, task_id):
 
				             df.at[index, 'New_Generated_Title'] = new_title
			
 
				             
			
 
				             # Optional: Sleep to prevent getting blocked by the server
			
 
				-            time.sleep(1)
			
 
				+            # Generates a random float between 3.0 and 7.0
			
 
				+            time.sleep(random.uniform(3, 7))
			
 
				+            # time.sleep(1)
			
 
				 
			
 
				         # 4. Save the modified Excel to the MEDIA folder
			
 
				         output_filename = f"completed_{task_id}_{task.original_filename}"