Procházet zdrojové kódy

changes for the file upload in title creator

VISHAL BHANUSHALI před 4 týdny
rodič
revize
abf1085937

binární
media/examples/title_creator/example_item_no.xlsx


binární
media/examples/title_creator/example_url.xlsx


+ 53 - 0
title_creator_app/spiders.py

@@ -0,0 +1,53 @@
+# spiders.py
+import json
+import scrapy
+
+class TitleCreatorSpider(scrapy.Spider):
+    name = "title_creator"
+
+    def __init__(self, urls=None, selected_pt=None, *args, **kwargs):
+        super(TitleCreatorSpider, self).__init__(*args, **kwargs)
+        self.start_urls = urls or []
+        self.selected_pt = selected_pt
+        self.results = []
+
+    def parse(self, response):
+        # We import here to avoid 'AppRegistryNotReady' errors in Django
+        from .views import construct_dynamic_title 
+        new_title = ""
+        status_d = False
+    
+        if response.status != 200:
+            new_title = f"Failed (HTTP {response.status})"
+            status_d = False
+        else:
+            print("response",response)
+            script_tag = response.css('script#__NEXT_DATA__::text').get()
+            if script_tag:
+                try:
+                    raw_data = json.loads(script_tag)
+                    new_title = construct_dynamic_title(raw_data, self.selected_pt)
+                    status_d = True
+                except Exception:
+                    new_title = "Data Parsing Error"
+                    status_d = False
+            else:
+                new_title = "Attribute not found (Empty Script)"
+                status_d = False
+                
+        # script_tag = response.css('script#__NEXT_DATA__::text').get()
+        # if script_tag:
+        #     try:
+        #         raw_data = json.loads(script_tag)
+        #         new_title = construct_dynamic_title(raw_data, self.selected_pt)
+        #     except Exception:
+        #         new_title = "Data Parsing Error"
+        # else:
+        #     new_title = "Could not find __NEXT_DATA__"
+
+        self.results.append({
+            "id": self.selected_pt,
+            "url": response.url,
+            "new_title": new_title,
+            "status": status_d
+        })

+ 21 - 1
title_creator_app/templates/title_creator_index.html

@@ -158,7 +158,27 @@
         <div class="mb-10 flex flex-col md:flex-row md:items-end md:justify-between gap-4 border-b border-gray-100 pb-6">
             <div class="text-left">
                 <h1 class="text-4xl font-black text-gray-900 tracking-tight mb-2">
-                    Title Creator
+                    Title Creator 
+                        <!-- <div class="mt-4 md:mt-0 flex items-center gap-3"> -->
+        <!-- <span class="text-xs font-semibold text-gray-400 uppercase tracking-wider mr-2">Templates:</span> -->
+        
+        <a href="./../media/examples/title_creator/example_url.xlsx" 
+           class="inline-flex items-center px-4 py-2 bg-white border border-gray-300 rounded-lg text-sm font-medium text-gray-700 hover:bg-gray-50 hover:text-blue-600 transition-all shadow-sm gap-2"
+           title="Download URL Template" download>
+            <i class="bi bi-download"></i>
+            URL Template
+        </a>
+
+        <a href="./../media/examples/title_creator/example_item_no.xlsx" 
+           class="inline-flex items-center px-4 py-2 bg-white border border-gray-300 rounded-lg text-sm font-medium text-gray-700 hover:bg-gray-50 hover:text-blue-600 transition-all shadow-sm gap-2"
+           title="Download Item Number Template" download>
+            <i class="bi bi-download"></i>
+            Item No. Template
+        </a>
+    <!-- </div> -->
+                    <!-- &nbsp;<span><a href="./../media/examples/title_creator/example_url.xlsx"    title="Example file" download><i class="bi bi-download"></i>Url</a></span>
+                    &nbsp;<span><a href="./../media/examples/title_creator/example_item_no.xlsx"    title="Example file" download><i class="bi bi-download"></i>Item No.</a></span> -->
+
                 </h1>
                 <p class="text-gray-500 font-medium ">
                     Upload Excel URLs to generate dynamic titles from Data.

+ 65 - 13
title_creator_app/views.py

@@ -17,7 +17,7 @@ from django.contrib.auth import authenticate, login, logout
 # from django.contrib.auth.decorators import login_required
 from .decorators import login_required
 from django.contrib.auth.hashers import make_password
-
+import random
 
 # To login
 def login_view(request):
@@ -227,7 +227,7 @@ def extract_title_or_error(product, selected_pt):
         w = dimensions["Width"].replace(" in", "").strip()
         d = dimensions["Depth"].replace(" in", "").strip()
         h = dimensions["Height"].replace(" in", "").strip()
-        extracted_data["Dimensions"] = f'{w}"w x {d}"d x {h}"h'
+        extracted_data["Dimensions"] = f'{w}"W x {d}"D x {h}"H'
 
     # 3. Build Title Parts
     final_title_parts = []
@@ -253,7 +253,7 @@ def extract_title_or_error(product, selected_pt):
     current_title = construct_string(final_title_parts)
 
     # 4. Length Reduction Logic (Step-by-Step)
-    
+    print("Current Title 1 ########",current_title,len(current_title))
     # Step 1: Change "Capacity" -> "Cap."
     if len(current_title) > 100:
         for i, part in enumerate(final_title_parts):
@@ -261,6 +261,8 @@ def extract_title_or_error(product, selected_pt):
                 final_title_parts[i] = part.replace("Capacity", "Cap.")
         current_title = construct_string(final_title_parts)
 
+    print("Current Title 2 ########",current_title,len(current_title))    
+
     # Step 2: Shorten Product Type (e.g., Stainless Steel -> SS)
     # Step B: Dynamic Product Type Acronym
     if len(current_title) > 100:
@@ -270,7 +272,7 @@ def extract_title_or_error(product, selected_pt):
             # Takes first letter of every word in the Product Type
             final_title_parts[1] = "".join([w[0].upper() for w in words])
             current_title = construct_string(final_title_parts)
-
+    print("Current Title 3 ########",current_title,len(current_title))
     # Step 3: Remove spaces from attributes starting from the back
     # Brand (0) and Product Type (1) are skipped
     if len(current_title) > 100:
@@ -280,7 +282,7 @@ def extract_title_or_error(product, selected_pt):
             # Remove white spaces from the current attribute part
             final_title_parts[i] = final_title_parts[i].replace(" ", "")
             current_title = construct_string(final_title_parts)
-
+    print("Current Title 4 ########",current_title,len(current_title))
     return current_title
 
 def construct_dynamic_title(raw_data,selected_pt):
@@ -311,15 +313,39 @@ def title_creator_view(request):
 
             headers = {"User-Agent": "Mozilla/5.0"}
             results_for_ui = []
+            # Specific Headers for the Item# API
+            api_headers = {
+                "accept": "application/json, text/plain, */*",
+                "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
+                "client_id": "GEC",
+                "referer": "https://www.globalindustrial.com/"
+            }
 
             # 3. Process each row
             for index, row in df.iterrows():
                 url = row.get('URL') # Assumes your excel has a 'URL' column
+                item_number = row.get('Item#')
                 new_title = ""
+                final_url = None
+
+                # Step 1: Resolve the URL
+                if pd.notna(url) and str(url).startswith('http'):
+                    final_url = url
+                elif pd.notna(item_number):
+                    # Call API to get URL from Item#
+                    api_url = f"https://www.globalindustrial.com/catalogApis/catalog/autosuggest?key={item_number}&features=true"
+                    try:
+                        api_resp = requests.get(api_url, headers=api_headers, timeout=10)
+                        if api_resp.status_code == 200:
+                            data = api_resp.json()
+                            final_url = data.get('exactMatch', {}).get('canonicalLink')
+                    except Exception as e:
+                        new_title = f"API Error for Item# {item_number}"
+
 
-                if pd.notna(url):
+                if pd.notna(final_url):
                     try:
-                        resp = requests.get(url, headers=headers, timeout=10)
+                        resp = requests.get(final_url, headers=headers, timeout=10)
                         soup = BeautifulSoup(resp.content, 'html.parser')
                         script_tag = soup.find('script', id='__NEXT_DATA__')
                         
@@ -338,11 +364,13 @@ def title_creator_view(request):
                 
                 results_for_ui.append({
                     "id" : index + 1,
-                    "url": url,
+                    "url": final_url,
                     "new_title": new_title,
                     "status": True
                 })
-                time.sleep(1) # Safety delay
+                # Generates a random float between 3.0 and 7.0
+                time.sleep(random.uniform(3, 7))
+                # time.sleep(1) # Safety delay
 
             # 4. Save the modified Excel to a new path
             output_filename = f"processed_{excel_file.name}"
@@ -377,17 +405,39 @@ def process_excel_task(file_path, selected_pt, task_id):
             df['New_Generated_Title'] = ""
 
         headers = {"User-Agent": "Mozilla/5.0"}
+        # Specific Headers for the Item# API
+        api_headers = {
+            "accept": "application/json, text/plain, */*",
+            "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
+            "client_id": "GEC",
+            "referer": "https://www.globalindustrial.com/"
+        }
 
         # 3. Process each row
         for index, row in df.iterrows():
             url = row.get('URL')
             new_title = ""
-
-            if pd.notna(url):
+            item_number = row.get('Item#')
+            final_url = None
+            # Step 1: Resolve the URL
+            if pd.notna(url) and str(url).startswith('http'):
+                final_url = url
+            elif pd.notna(item_number):
+                # Call API to get URL from Item#
+                api_url = f"https://www.globalindustrial.com/catalogApis/catalog/autosuggest?key={item_number}&features=true"
+                try:
+                    api_resp = requests.get(api_url, headers=api_headers, timeout=10)
+                    if api_resp.status_code == 200:
+                        data = api_resp.json()
+                        final_url = data.get('exactMatch', {}).get('canonicalLink')
+                except Exception as e:
+                    new_title = f"API Error for Item# {item_number}"
+
+            if pd.notna(final_url):
                 try:
                     # Scraping logic
                     # resp = scraper.get(url, timeout=15)
-                    resp = requests.get(url, headers=headers, timeout=10)
+                    resp = requests.get(final_url, headers=headers, timeout=10)
                     if resp.status_code == 200:
                         soup = BeautifulSoup(resp.content, 'html.parser')
                         script_tag = soup.find('script', id='__NEXT_DATA__')
@@ -412,7 +462,9 @@ def process_excel_task(file_path, selected_pt, task_id):
             df.at[index, 'New_Generated_Title'] = new_title
             
             # Optional: Sleep to prevent getting blocked by the server
-            time.sleep(1)
+            # Generates a random float between 3.0 and 7.0
+            time.sleep(random.uniform(3, 7))
+            # time.sleep(1)
 
         # 4. Save the modified Excel to the MEDIA folder
         output_filename = f"completed_{task_id}_{task.original_filename}"