瀏覽代碼

changes for the completed date in task and mandatory part remove and comment added in title creation app

VISHAL BHANUSHALI 4 周之前
父節點
當前提交
8aec9fe8d4

+ 2 - 1
content_quality_tool/settings.py

@@ -98,7 +98,8 @@ AUTH_PASSWORD_VALIDATORS = [
 # Internationalization
 # https://docs.djangoproject.com/en/5.2/topics/i18n/
 LANGUAGE_CODE = 'en-us'
-TIME_ZONE = 'UTC'
+# TIME_ZONE = 'UTC'
+TIME_ZONE = 'Asia/Kolkata'
 USE_I18N = True
 USE_TZ = True
 # Static files (CSS, JavaScript, Images)

+ 9 - 1
title_creator_app/models.py

@@ -24,6 +24,14 @@ class ProcessingTask(models.Model):
     status = models.CharField(max_length=20, default='PENDING') 
     download_url = models.TextField(null=True, blank=True)
     created_at = models.DateTimeField(auto_now_add=True)
+    completed_at = models.DateTimeField(null=True, blank=True)
 
     def __str__(self):
-        return f"{self.original_filename} - {self.status}"    
+        return f"{self.original_filename} - {self.status}" 
+
+    @property
+    def duration(self):
+        """Calculates how long the process took."""
+        if self.completed_at and self.created_at:
+            return self.completed_at - self.created_at
+        return None   

+ 3 - 0
title_creator_app/templates/title_creator_history.html

@@ -61,6 +61,7 @@
                             <thead class="bg-gray-50/50 border-b border-gray-100">
                                 <tr>
                                     <th class="px-6 py-4 text-[11px] font-black text-gray-400 uppercase tracking-widest">Date Created</th>
+                                    <th class="px-6 py-4 text-[11px] font-black text-gray-400 uppercase tracking-widest">Completed Time</th>
                                     <th class="px-6 py-4 text-[11px] font-black text-gray-400 uppercase tracking-widest">Filename</th>
                                     <th class="px-6 py-4 text-[11px] font-black text-gray-400 uppercase tracking-widest text-center">Status</th>
                                     <th class="px-6 py-4 text-[11px] font-black text-gray-400 uppercase tracking-widest text-end">Action</th>
@@ -109,6 +110,8 @@
                     return `
                         <tr class="hover:bg-gray-50/50 transition-colors">
                             <td class="px-6 py-4 text-sm font-bold text-gray-700">${task.date}</td>
+                            <td class="px-6 py-4 text-sm font-bold text-gray-700">${task?.completed_at ? task?.completed_at : ''}</td>
+                            
                             <td class="px-6 py-4">
                                 <div class="flex items-center gap-3">
                                     <div class="p-2 bg-gray-100 rounded-lg"><i class="bi bi-file-earmark-spreadsheet text-blue-600"></i></div>

+ 2 - 0
title_creator_app/urls.py

@@ -14,4 +14,6 @@ urlpatterns = [
     path('api/save-config/', views.save_config_api, name='save_config_api'),
     path('title-creator/history/', views.title_creator_history_page, name='title_creator_history'),
     path('title-creator/api/tasks/', views.get_title_creator_tasks_json, name='title_creator_tasks_json'),
+    path('get-token-scrapper/', views.TokenFetcherAPI.as_view(), name='get_token_scrapper'),
 ]+ static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
+

+ 139 - 51
title_creator_app/views.py

@@ -18,6 +18,11 @@ from django.contrib.auth import authenticate, login, logout
 from .decorators import login_required
 from django.contrib.auth.hashers import make_password
 import random
+from rest_framework import status
+from rest_framework.views import APIView
+from django.utils import timezone
+import logging
+logger = logging.getLogger(__name__)
 
 # To login
 def login_view(request):
@@ -194,18 +199,28 @@ def save_config_api(request):
 def extract_title_or_error(product, selected_pt):
     # 1. Identify Product Type
     pt_name = selected_pt
+    logger.info(f"IN extract_title_or_error")
     try:
         mapping = TitleMapping.objects.get(product_type=pt_name)
         config_sequence = mapping.get_sequence_list()
     except TitleMapping.DoesNotExist:
-        return f"No Title Configuration found for Product Type: {pt_name}"
+        return None,f"No Title Configuration found for Product Type: {pt_name}"
 
     mandatory_fields = list(AttributeMaster.objects.filter(is_mandatory=True).values_list('name', flat=True))
+    # Loop through each group (e.g., Weights & Dimensions, Product Details)
+    product_type = None
+    # for product type
+    for group in product.get("attributeGroups", []):
+        # Loop through each attribute in that group
+        for attr in group.get("attributes", []):
+            if attr.get("attributeDesc") == "Type":
+                product_type = attr.get("attributeValue")
+                break  # Stop searching once found
 
     # 2. Data Extraction
     extracted_data = {
-        "Brand": product.get("brand")+"©",
-        "Product Type": pt_name
+        "Brand": product.get("brand"),
+        "Product Type": product_type
     }
     dimensions = {}
 
@@ -215,7 +230,7 @@ def extract_title_or_error(product, selected_pt):
             val = attr.get("attributeValue")
             
             if desc == "Capacity":
-                extracted_data[desc] = f"Capacity {val}"
+                extracted_data[desc] = f"{val} Capacity"
             elif desc in ["Door Type", "Color"]:
                 extracted_data[desc] = val
             elif desc in ["Width", "Depth", "Height"]:
@@ -240,9 +255,10 @@ def extract_title_or_error(product, selected_pt):
                 missing_mandatory.append(attr_name)
             continue 
         final_title_parts.append(str(val))
-
+    comment =  None
     if missing_mandatory:
-        return f"Could not found {', '.join(missing_mandatory)} on Product Details page"
+        comment = f"Could not found {', '.join(missing_mandatory)} on Product Details page"
+        # return f"Could not found {', '.join(missing_mandatory)} on Product Details page"
 
     # Helper function to join parts: Brand PT, Param1, Param2
     def construct_string(parts):
@@ -253,7 +269,8 @@ def extract_title_or_error(product, selected_pt):
     current_title = construct_string(final_title_parts)
 
     # 4. Length Reduction Logic (Step-by-Step)
-    print("Current Title 1 ########",current_title,len(current_title))
+    print("Current Title 1 ######## ",current_title,len(current_title))
+    logger.info(f"Current Title 1 Initial ########,{current_title},{len(current_title)}")
     # Step 1: Change "Capacity" -> "Cap."
     if len(current_title) > 100:
         for i, part in enumerate(final_title_parts):
@@ -261,18 +278,20 @@ def extract_title_or_error(product, selected_pt):
                 final_title_parts[i] = part.replace("Capacity", "Cap.")
         current_title = construct_string(final_title_parts)
 
-    print("Current Title 2 ########",current_title,len(current_title))    
+    print("Current Title 2 ########",current_title,len(current_title))
+    logger.info(f"Current Title 2 shorting capacity ########,{current_title},{len(current_title)}")    
 
     # Step 2: Shorten Product Type (e.g., Stainless Steel -> SS)
     # Step B: Dynamic Product Type Acronym
-    if len(current_title) > 100:
-        pt_part = final_title_parts[1]
-        words = pt_part.split()
-        if len(words) > 1:
-            # Takes first letter of every word in the Product Type
-            final_title_parts[1] = "".join([w[0].upper() for w in words])
-            current_title = construct_string(final_title_parts)
-    print("Current Title 3 ########",current_title,len(current_title))
+    # if len(current_title) > 100:
+    #     pt_part = final_title_parts[1]
+    #     words = pt_part.split()
+    #     if len(words) > 1:
+    #         # Takes first letter of every word in the Product Type
+    #         final_title_parts[1] = "".join([w[0].upper() for w in words])
+    #         current_title = construct_string(final_title_parts)
+    # print("Current Title 3 ########",current_title,len(current_title))
+    # logger.info(f"Current Title 3 change the title ########,{current_title},{len(current_title)}")
     # Step 3: Remove spaces from attributes starting from the back
     # Brand (0) and Product Type (1) are skipped
     if len(current_title) > 100:
@@ -283,20 +302,25 @@ def extract_title_or_error(product, selected_pt):
             final_title_parts[i] = final_title_parts[i].replace(" ", "")
             current_title = construct_string(final_title_parts)
     print("Current Title 4 ########",current_title,len(current_title))
-    return current_title
+    logger.info(f"Current Title 4 Removing space ########,{current_title},{len(current_title)}")
+    return current_title,comment
 
 def construct_dynamic_title(raw_data,selected_pt):
     try:
         product = raw_data.get("props", {}).get("pageProps", {}).get("product", {})
         if not product: return "Product data not found"
-        
-        return extract_title_or_error(product,selected_pt).strip()
+        logger.info(f"IN construct_dynamic_title")
+        return extract_title_or_error(product,selected_pt)
     except Exception:
-        return "Could not found attribute name on product details page"
+        return None,"Could not found attribute name on product details page"
 
 @login_required
 def title_creator_view(request):
     if request.method == 'POST' and request.FILES.get('file'):
+        fresh_token = get_fresh_token()
+        logger.info(f"fresh_token Value: {fresh_token}")
+        if not fresh_token:
+            fresh_token = "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA"
         excel_file = request.FILES['file']
         selected_pt = request.POST.get('product_type')
         fs = FileSystemStorage()
@@ -310,13 +334,15 @@ def title_creator_view(request):
             # 2. Add the NEW COLUMN if it doesn't exist
             if 'New_Generated_Title' not in df.columns:
                 df['New_Generated_Title'] = ""
+            if 'Comment' not in df.columns:
+                df['Comment'] = ""    
 
             headers = {"User-Agent": "Mozilla/5.0"}
             results_for_ui = []
             # Specific Headers for the Item# API
             api_headers = {
                 "accept": "application/json, text/plain, */*",
-                "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
+                "authorization": fresh_token,#"Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
                 "client_id": "GEC",
                 "referer": "https://www.globalindustrial.com/"
             }
@@ -327,6 +353,7 @@ def title_creator_view(request):
                 item_number = row.get('Item#')
                 new_title = ""
                 final_url = None
+                comment = ""
 
                 # Step 1: Resolve the URL
                 if pd.notna(url) and str(url).startswith('http'):
@@ -351,21 +378,23 @@ def title_creator_view(request):
                         
                         if script_tag:
                             raw_data = json.loads(script_tag.string)
-                            new_title = construct_dynamic_title(raw_data,selected_pt)
+                            new_title,comment = construct_dynamic_title(raw_data,selected_pt)
                         else:
-                            new_title = "Could not found attribute name on product details page"
+                            new_title,comment = "Could not found attribute name on product details page",None
                     except:
-                        new_title = "Could not found attribute name on product details page"
+                        new_title,comment = "Could not found attribute name on product details page",None
                 else:
-                    new_title = "URL Missing"
+                    new_title,comment = "URL Missing",None
 
                 # Update the DataFrame column for this row
                 df.at[index, 'New_Generated_Title'] = new_title
+                df.at[index, 'Comment'] = comment
                 
                 results_for_ui.append({
                     "id" : index + 1,
                     "url": final_url,
                     "new_title": new_title,
+                    "comment": comment,
                     "status": True
                 })
                 # Generates a random float between 3.0 and 7.0
@@ -391,38 +420,40 @@ def title_creator_view(request):
     return render(request, 'title_creator_index.html', {'product_types': product_types})
     # return render(request, 'title_creator_index.html')
 
-async def get_fresh_token(scraper):
-    """Hits the homepage once to extract the latest Bearer token."""
+def get_fresh_token():
+    """Hits the homepage to extract the latest Bearer token."""
     base_url = "https://www.globalindustrial.com"
     try:
-        # One-time hit to the base URL
-        response = scraper.get(base_url, timeout=15)
+        # Use a session to persist cookies
+        session = requests.Session()
+        response = session.get(base_url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
         
-        # 1. Check Cookies for 'Authorization'
-        token = scraper.cookies.get('Authorization')
+        # 1. Try Cookies
+        token = session.cookies.get('Authorization')
         if token:
-            return token.replace('Bearer ', '').strip()
+            return token if "Bearer" in token else f"Bearer {token}"
 
-        # 2. Check __NEXT_DATA__ script in HTML
+        # 2. Try NEXT_DATA
         soup = BeautifulSoup(response.content, 'html.parser')
         script_tag = soup.find('script', id='__NEXT_DATA__')
         if script_tag:
             data = json.loads(script_tag.string)
-            # Standard Next.js path for auth tokens
             token = data.get('props', {}).get('pageProps', {}).get('token')
             if token:
-                return token
-                
+                return f"Bearer {token}"
     except Exception as e:
         print(f"Token retrieval failed: {e}")
-    
-    # Fallback to your hardcoded token if extraction fails
-    return "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA"
+    return None
 
-async def process_excel_task(file_path, selected_pt, task_id):
+def process_excel_task(file_path, selected_pt, task_id):
+    print("process excel task started.")
     # Retrieve the task record from the database
-    scraper = cloudscraper.create_scraper() # This replaces requests.get
+    # scraper = cloudscraper.create_scraper() # This replaces requests.get
     task = ProcessingTask.objects.get(task_id=task_id)
+    fresh_token = get_fresh_token()
+    logger.info(f"fresh_token Value: {fresh_token}")
+    if not fresh_token:
+            fresh_token = "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA"
     
     try:
         # 1. Read Excel
@@ -431,21 +462,25 @@ async def process_excel_task(file_path, selected_pt, task_id):
         # 2. Add the NEW COLUMN if it doesn't exist
         if 'New_Generated_Title' not in df.columns:
             df['New_Generated_Title'] = ""
+        if 'Comment' not in df.columns:
+            df['Comment'] = ""        
 
         headers = {"User-Agent": "Mozilla/5.0"}
         # dynamic_token = await get_fresh_token(scraper)
         # Specific Headers for the Item# API
         api_headers = {
             "accept": "application/json, text/plain, */*",
-            "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA", #f"Bearer {dynamic_token}",
+            "authorization": fresh_token, #"Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA", #f"Bearer {dynamic_token}",
             "client_id": "GEC",
             "referer": "https://www.globalindustrial.com/"
         }
 
         # 3. Process each row
         for index, row in df.iterrows():
+            logger.info(f"STARTED: {index}")
             url = row.get('URL')
             new_title = ""
+            comment = ""
             item_number = row.get('Item#')
             final_url = None
             # Step 1: Resolve the URL
@@ -460,7 +495,7 @@ async def process_excel_task(file_path, selected_pt, task_id):
                         data = api_resp.json()
                         final_url = data.get('exactMatch', {}).get('canonicalLink')
                 except Exception as e:
-                    new_title = f"API Error for Item# {item_number}"
+                    new_title,comment = f"API Error for Item# {item_number}"
 
             if pd.notna(final_url):
                 try:
@@ -475,24 +510,26 @@ async def process_excel_task(file_path, selected_pt, task_id):
                             try:
                                 raw_data = json.loads(script_tag.string)
                                 # Calling your dynamic title helper
-                                new_title = construct_dynamic_title(raw_data, selected_pt)
+                                new_title,comment = construct_dynamic_title(raw_data, selected_pt)
                             except Exception:
-                                new_title = "Data Parsing Error"
+                                new_title,comment = "Data Parsing Error",None
                         else:
-                            new_title = "Could not found attribute name on product details page"
+                            new_title,comment = "Could not found attribute name on product details page",None
                     else:
-                        new_title = f"HTTP Error: {resp.status_code}"
+                        new_title,comment = f"HTTP Error: {resp.status_code}",None
                 except Exception:
-                    new_title = "Request Failed (Timeout/Connection)"
+                    new_title,comment = "Request Failed (Timeout/Connection)",None
             else:
-                new_title = "URL Missing"
+                new_title,comment = "URL Missing",None
 
             # Update the DataFrame
             df.at[index, 'New_Generated_Title'] = new_title
+            df.at[index, 'Comment'] = comment
             
             # Optional: Sleep to prevent getting blocked by the server
             # Generates a random float between 3.0 and 7.0
             time.sleep(random.uniform(3, 7))
+            logger.info(f"ENDED: {index}")
             # time.sleep(1)
 
         # 4. Save the modified Excel to the MEDIA folder
@@ -509,7 +546,9 @@ async def process_excel_task(file_path, selected_pt, task_id):
         task.status = 'COMPLETED'
         # Construct the URL for the frontend to download
         task.download_url = f"{settings.MEDIA_URL}{output_filename}"
+        task.completed_at = timezone.now() # Sets the completion time to NOW (IST)
         task.save()
+        print("process excel task ended.")
 
     except Exception as e:
         print(f"Critical Task Failure: {e}")
@@ -524,6 +563,7 @@ async def process_excel_task(file_path, selected_pt, task_id):
 @login_required
 def title_creator_async_view(request):
     if request.method == 'POST' and request.FILES.get('file'):
+        
         excel_file = request.FILES['file']
         selected_pt = request.POST.get('product_type')
         
@@ -582,6 +622,54 @@ def get_title_creator_tasks_json(request):
             'filename': t.original_filename or "Unknown File",
             'status': t.status,
             'url': t.download_url,
-            'date': t.created_at.strftime("%d %b %Y, %I:%M %p")
+            'date': t.created_at.strftime("%d %b %Y, %I:%M %p"),
+            # Use a conditional (ternary) operator to handle the null case
+            'completed_at': t.completed_at.strftime("%d %b %Y, %I:%M %p") if t.completed_at else ""
+            # 'completed_at': t.completed_at.strftime("%d %b %Y, %I:%M %p")
         })
-    return JsonResponse(data, safe=False)
+    return JsonResponse(data, safe=False)
+
+
+class TokenFetcherAPI(APIView):
+    def get(self, request):
+        token = fetch_global_industrial_token()
+        
+        if token:
+            return JsonResponse({
+                "status": "success",
+                "token": token
+            }, status=status.HTTP_200_OK)
+        
+        return JsonResponse({
+            "status": "error",
+            "message": "Could not retrieve token"
+        }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+    
+
+
+def fetch_global_industrial_token():
+    """Logic to scrape the token."""
+    base_url = "https://www.globalindustrial.com"
+    # Using cloudscraper to handle potential bot detection
+    scraper = cloudscraper.create_scraper() 
+    
+    try:
+        response = scraper.get(base_url, timeout=15)
+        
+        # 1. Check Cookies
+        token = scraper.cookies.get('Authorization')
+        if token:
+            return token.replace('Bearer ', '').strip()
+
+        # 2. Check __NEXT_DATA__
+        soup = BeautifulSoup(response.content, 'html.parser')
+        script_tag = soup.find('script', id='__NEXT_DATA__')
+        if script_tag:
+            data = json.loads(script_tag.string)
+            token = data.get('props', {}).get('pageProps', {}).get('token')
+            if token:
+                return token
+                
+    except Exception as e:
+        return None
+    return None