|
|
@@ -18,6 +18,11 @@ from django.contrib.auth import authenticate, login, logout
|
|
|
from .decorators import login_required
|
|
|
from django.contrib.auth.hashers import make_password
|
|
|
import random
|
|
|
+from rest_framework import status
|
|
|
+from rest_framework.views import APIView
|
|
|
+from django.utils import timezone
|
|
|
+import logging
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
|
|
|
# To login
|
|
|
def login_view(request):
|
|
|
@@ -194,18 +199,28 @@ def save_config_api(request):
|
|
|
def extract_title_or_error(product, selected_pt):
|
|
|
# 1. Identify Product Type
|
|
|
pt_name = selected_pt
|
|
|
+ logger.info(f"IN extract_title_or_error")
|
|
|
try:
|
|
|
mapping = TitleMapping.objects.get(product_type=pt_name)
|
|
|
config_sequence = mapping.get_sequence_list()
|
|
|
except TitleMapping.DoesNotExist:
|
|
|
- return f"No Title Configuration found for Product Type: {pt_name}"
|
|
|
+ return None,f"No Title Configuration found for Product Type: {pt_name}"
|
|
|
|
|
|
mandatory_fields = list(AttributeMaster.objects.filter(is_mandatory=True).values_list('name', flat=True))
|
|
|
+ # Loop through each group (e.g., Weights & Dimensions, Product Details)
|
|
|
+ product_type = None
|
|
|
+ # for product type
|
|
|
+ for group in product.get("attributeGroups", []):
|
|
|
+ # Loop through each attribute in that group
|
|
|
+ for attr in group.get("attributes", []):
|
|
|
+ if attr.get("attributeDesc") == "Type":
|
|
|
+ product_type = attr.get("attributeValue")
|
|
|
+ break # Stop searching once found
|
|
|
|
|
|
# 2. Data Extraction
|
|
|
extracted_data = {
|
|
|
- "Brand": product.get("brand")+"©",
|
|
|
- "Product Type": pt_name
|
|
|
+ "Brand": product.get("brand"),
|
|
|
+ "Product Type": product_type
|
|
|
}
|
|
|
dimensions = {}
|
|
|
|
|
|
@@ -215,7 +230,7 @@ def extract_title_or_error(product, selected_pt):
|
|
|
val = attr.get("attributeValue")
|
|
|
|
|
|
if desc == "Capacity":
|
|
|
- extracted_data[desc] = f"Capacity {val}"
|
|
|
+ extracted_data[desc] = f"{val} Capacity"
|
|
|
elif desc in ["Door Type", "Color"]:
|
|
|
extracted_data[desc] = val
|
|
|
elif desc in ["Width", "Depth", "Height"]:
|
|
|
@@ -240,9 +255,10 @@ def extract_title_or_error(product, selected_pt):
|
|
|
missing_mandatory.append(attr_name)
|
|
|
continue
|
|
|
final_title_parts.append(str(val))
|
|
|
-
|
|
|
+ comment = None
|
|
|
if missing_mandatory:
|
|
|
- return f"Could not found {', '.join(missing_mandatory)} on Product Details page"
|
|
|
+ comment = f"Could not found {', '.join(missing_mandatory)} on Product Details page"
|
|
|
+ # return f"Could not found {', '.join(missing_mandatory)} on Product Details page"
|
|
|
|
|
|
# Helper function to join parts: Brand PT, Param1, Param2
|
|
|
def construct_string(parts):
|
|
|
@@ -253,7 +269,8 @@ def extract_title_or_error(product, selected_pt):
|
|
|
current_title = construct_string(final_title_parts)
|
|
|
|
|
|
# 4. Length Reduction Logic (Step-by-Step)
|
|
|
- print("Current Title 1 ########",current_title,len(current_title))
|
|
|
+ print("Current Title 1 ######## ",current_title,len(current_title))
|
|
|
+ logger.info(f"Current Title 1 Initial ########,{current_title},{len(current_title)}")
|
|
|
# Step 1: Change "Capacity" -> "Cap."
|
|
|
if len(current_title) > 100:
|
|
|
for i, part in enumerate(final_title_parts):
|
|
|
@@ -261,18 +278,20 @@ def extract_title_or_error(product, selected_pt):
|
|
|
final_title_parts[i] = part.replace("Capacity", "Cap.")
|
|
|
current_title = construct_string(final_title_parts)
|
|
|
|
|
|
- print("Current Title 2 ########",current_title,len(current_title))
|
|
|
+ print("Current Title 2 ########",current_title,len(current_title))
|
|
|
+ logger.info(f"Current Title 2 shorting capacity ########,{current_title},{len(current_title)}")
|
|
|
|
|
|
# Step 2: Shorten Product Type (e.g., Stainless Steel -> SS)
|
|
|
# Step B: Dynamic Product Type Acronym
|
|
|
- if len(current_title) > 100:
|
|
|
- pt_part = final_title_parts[1]
|
|
|
- words = pt_part.split()
|
|
|
- if len(words) > 1:
|
|
|
- # Takes first letter of every word in the Product Type
|
|
|
- final_title_parts[1] = "".join([w[0].upper() for w in words])
|
|
|
- current_title = construct_string(final_title_parts)
|
|
|
- print("Current Title 3 ########",current_title,len(current_title))
|
|
|
+ # if len(current_title) > 100:
|
|
|
+ # pt_part = final_title_parts[1]
|
|
|
+ # words = pt_part.split()
|
|
|
+ # if len(words) > 1:
|
|
|
+ # # Takes first letter of every word in the Product Type
|
|
|
+ # final_title_parts[1] = "".join([w[0].upper() for w in words])
|
|
|
+ # current_title = construct_string(final_title_parts)
|
|
|
+ # print("Current Title 3 ########",current_title,len(current_title))
|
|
|
+ # logger.info(f"Current Title 3 change the title ########,{current_title},{len(current_title)}")
|
|
|
# Step 3: Remove spaces from attributes starting from the back
|
|
|
# Brand (0) and Product Type (1) are skipped
|
|
|
if len(current_title) > 100:
|
|
|
@@ -283,20 +302,25 @@ def extract_title_or_error(product, selected_pt):
|
|
|
final_title_parts[i] = final_title_parts[i].replace(" ", "")
|
|
|
current_title = construct_string(final_title_parts)
|
|
|
print("Current Title 4 ########",current_title,len(current_title))
|
|
|
- return current_title
|
|
|
+ logger.info(f"Current Title 4 Removing space ########,{current_title},{len(current_title)}")
|
|
|
+ return current_title,comment
|
|
|
|
|
|
def construct_dynamic_title(raw_data,selected_pt):
|
|
|
try:
|
|
|
product = raw_data.get("props", {}).get("pageProps", {}).get("product", {})
|
|
|
if not product: return "Product data not found"
|
|
|
-
|
|
|
- return extract_title_or_error(product,selected_pt).strip()
|
|
|
+ logger.info(f"IN construct_dynamic_title")
|
|
|
+ return extract_title_or_error(product,selected_pt)
|
|
|
except Exception:
|
|
|
- return "Could not found attribute name on product details page"
|
|
|
+ return None,"Could not found attribute name on product details page"
|
|
|
|
|
|
@login_required
|
|
|
def title_creator_view(request):
|
|
|
if request.method == 'POST' and request.FILES.get('file'):
|
|
|
+ fresh_token = get_fresh_token()
|
|
|
+ logger.info(f"fresh_token Value: {fresh_token}")
|
|
|
+ if not fresh_token:
|
|
|
+ fresh_token = "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA"
|
|
|
excel_file = request.FILES['file']
|
|
|
selected_pt = request.POST.get('product_type')
|
|
|
fs = FileSystemStorage()
|
|
|
@@ -310,13 +334,15 @@ def title_creator_view(request):
|
|
|
# 2. Add the NEW COLUMN if it doesn't exist
|
|
|
if 'New_Generated_Title' not in df.columns:
|
|
|
df['New_Generated_Title'] = ""
|
|
|
+ if 'Comment' not in df.columns:
|
|
|
+ df['Comment'] = ""
|
|
|
|
|
|
headers = {"User-Agent": "Mozilla/5.0"}
|
|
|
results_for_ui = []
|
|
|
# Specific Headers for the Item# API
|
|
|
api_headers = {
|
|
|
"accept": "application/json, text/plain, */*",
|
|
|
- "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
|
|
|
+ "authorization": fresh_token,#"Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
|
|
|
"client_id": "GEC",
|
|
|
"referer": "https://www.globalindustrial.com/"
|
|
|
}
|
|
|
@@ -327,6 +353,7 @@ def title_creator_view(request):
|
|
|
item_number = row.get('Item#')
|
|
|
new_title = ""
|
|
|
final_url = None
|
|
|
+ comment = ""
|
|
|
|
|
|
# Step 1: Resolve the URL
|
|
|
if pd.notna(url) and str(url).startswith('http'):
|
|
|
@@ -351,21 +378,23 @@ def title_creator_view(request):
|
|
|
|
|
|
if script_tag:
|
|
|
raw_data = json.loads(script_tag.string)
|
|
|
- new_title = construct_dynamic_title(raw_data,selected_pt)
|
|
|
+ new_title,comment = construct_dynamic_title(raw_data,selected_pt)
|
|
|
else:
|
|
|
- new_title = "Could not found attribute name on product details page"
|
|
|
+ new_title,comment = "Could not found attribute name on product details page",None
|
|
|
except:
|
|
|
- new_title = "Could not found attribute name on product details page"
|
|
|
+ new_title,comment = "Could not found attribute name on product details page",None
|
|
|
else:
|
|
|
- new_title = "URL Missing"
|
|
|
+ new_title,comment = "URL Missing",None
|
|
|
|
|
|
# Update the DataFrame column for this row
|
|
|
df.at[index, 'New_Generated_Title'] = new_title
|
|
|
+ df.at[index, 'Comment'] = comment
|
|
|
|
|
|
results_for_ui.append({
|
|
|
"id" : index + 1,
|
|
|
"url": final_url,
|
|
|
"new_title": new_title,
|
|
|
+ "comment": comment,
|
|
|
"status": True
|
|
|
})
|
|
|
# Generates a random float between 3.0 and 7.0
|
|
|
@@ -391,38 +420,40 @@ def title_creator_view(request):
|
|
|
return render(request, 'title_creator_index.html', {'product_types': product_types})
|
|
|
# return render(request, 'title_creator_index.html')
|
|
|
|
|
|
-async def get_fresh_token(scraper):
|
|
|
- """Hits the homepage once to extract the latest Bearer token."""
|
|
|
+def get_fresh_token():
|
|
|
+ """Hits the homepage to extract the latest Bearer token."""
|
|
|
base_url = "https://www.globalindustrial.com"
|
|
|
try:
|
|
|
- # One-time hit to the base URL
|
|
|
- response = scraper.get(base_url, timeout=15)
|
|
|
+ # Use a session to persist cookies
|
|
|
+ session = requests.Session()
|
|
|
+ response = session.get(base_url, timeout=15, headers={"User-Agent": "Mozilla/5.0"})
|
|
|
|
|
|
- # 1. Check Cookies for 'Authorization'
|
|
|
- token = scraper.cookies.get('Authorization')
|
|
|
+ # 1. Try Cookies
|
|
|
+ token = session.cookies.get('Authorization')
|
|
|
if token:
|
|
|
- return token.replace('Bearer ', '').strip()
|
|
|
+ return token if "Bearer" in token else f"Bearer {token}"
|
|
|
|
|
|
- # 2. Check __NEXT_DATA__ script in HTML
|
|
|
+ # 2. Try NEXT_DATA
|
|
|
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
script_tag = soup.find('script', id='__NEXT_DATA__')
|
|
|
if script_tag:
|
|
|
data = json.loads(script_tag.string)
|
|
|
- # Standard Next.js path for auth tokens
|
|
|
token = data.get('props', {}).get('pageProps', {}).get('token')
|
|
|
if token:
|
|
|
- return token
|
|
|
-
|
|
|
+ return f"Bearer {token}"
|
|
|
except Exception as e:
|
|
|
print(f"Token retrieval failed: {e}")
|
|
|
-
|
|
|
- # Fallback to your hardcoded token if extraction fails
|
|
|
- return "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA"
|
|
|
+ return None
|
|
|
|
|
|
-async def process_excel_task(file_path, selected_pt, task_id):
|
|
|
+def process_excel_task(file_path, selected_pt, task_id):
|
|
|
+ print("process excel task started.")
|
|
|
# Retrieve the task record from the database
|
|
|
- scraper = cloudscraper.create_scraper() # This replaces requests.get
|
|
|
+ # scraper = cloudscraper.create_scraper() # This replaces requests.get
|
|
|
task = ProcessingTask.objects.get(task_id=task_id)
|
|
|
+ fresh_token = get_fresh_token()
|
|
|
+ logger.info(f"fresh_token Value: {fresh_token}")
|
|
|
+ if not fresh_token:
|
|
|
+ fresh_token = "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA"
|
|
|
|
|
|
try:
|
|
|
# 1. Read Excel
|
|
|
@@ -431,21 +462,25 @@ async def process_excel_task(file_path, selected_pt, task_id):
|
|
|
# 2. Add the NEW COLUMN if it doesn't exist
|
|
|
if 'New_Generated_Title' not in df.columns:
|
|
|
df['New_Generated_Title'] = ""
|
|
|
+ if 'Comment' not in df.columns:
|
|
|
+ df['Comment'] = ""
|
|
|
|
|
|
headers = {"User-Agent": "Mozilla/5.0"}
|
|
|
# dynamic_token = await get_fresh_token(scraper)
|
|
|
# Specific Headers for the Item# API
|
|
|
api_headers = {
|
|
|
"accept": "application/json, text/plain, */*",
|
|
|
- "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA", #f"Bearer {dynamic_token}",
|
|
|
+ "authorization": fresh_token, #"Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA", #f"Bearer {dynamic_token}",
|
|
|
"client_id": "GEC",
|
|
|
"referer": "https://www.globalindustrial.com/"
|
|
|
}
|
|
|
|
|
|
# 3. Process each row
|
|
|
for index, row in df.iterrows():
|
|
|
+ logger.info(f"STARTED: {index}")
|
|
|
url = row.get('URL')
|
|
|
new_title = ""
|
|
|
+ comment = ""
|
|
|
item_number = row.get('Item#')
|
|
|
final_url = None
|
|
|
# Step 1: Resolve the URL
|
|
|
@@ -460,7 +495,7 @@ async def process_excel_task(file_path, selected_pt, task_id):
|
|
|
data = api_resp.json()
|
|
|
final_url = data.get('exactMatch', {}).get('canonicalLink')
|
|
|
except Exception as e:
|
|
|
- new_title = f"API Error for Item# {item_number}"
|
|
|
+ new_title,comment = f"API Error for Item# {item_number}"
|
|
|
|
|
|
if pd.notna(final_url):
|
|
|
try:
|
|
|
@@ -475,24 +510,26 @@ async def process_excel_task(file_path, selected_pt, task_id):
|
|
|
try:
|
|
|
raw_data = json.loads(script_tag.string)
|
|
|
# Calling your dynamic title helper
|
|
|
- new_title = construct_dynamic_title(raw_data, selected_pt)
|
|
|
+ new_title,comment = construct_dynamic_title(raw_data, selected_pt)
|
|
|
except Exception:
|
|
|
- new_title = "Data Parsing Error"
|
|
|
+ new_title,comment = "Data Parsing Error",None
|
|
|
else:
|
|
|
- new_title = "Could not found attribute name on product details page"
|
|
|
+ new_title,comment = "Could not found attribute name on product details page",None
|
|
|
else:
|
|
|
- new_title = f"HTTP Error: {resp.status_code}"
|
|
|
+ new_title,comment = f"HTTP Error: {resp.status_code}",None
|
|
|
except Exception:
|
|
|
- new_title = "Request Failed (Timeout/Connection)"
|
|
|
+ new_title,comment = "Request Failed (Timeout/Connection)",None
|
|
|
else:
|
|
|
- new_title = "URL Missing"
|
|
|
+ new_title,comment = "URL Missing",None
|
|
|
|
|
|
# Update the DataFrame
|
|
|
df.at[index, 'New_Generated_Title'] = new_title
|
|
|
+ df.at[index, 'Comment'] = comment
|
|
|
|
|
|
# Optional: Sleep to prevent getting blocked by the server
|
|
|
# Generates a random float between 3.0 and 7.0
|
|
|
time.sleep(random.uniform(3, 7))
|
|
|
+ logger.info(f"ENDED: {index}")
|
|
|
# time.sleep(1)
|
|
|
|
|
|
# 4. Save the modified Excel to the MEDIA folder
|
|
|
@@ -509,7 +546,9 @@ async def process_excel_task(file_path, selected_pt, task_id):
|
|
|
task.status = 'COMPLETED'
|
|
|
# Construct the URL for the frontend to download
|
|
|
task.download_url = f"{settings.MEDIA_URL}{output_filename}"
|
|
|
+ task.completed_at = timezone.now() # Sets the completion time to NOW (IST)
|
|
|
task.save()
|
|
|
+ print("process excel task ended.")
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Critical Task Failure: {e}")
|
|
|
@@ -524,6 +563,7 @@ async def process_excel_task(file_path, selected_pt, task_id):
|
|
|
@login_required
|
|
|
def title_creator_async_view(request):
|
|
|
if request.method == 'POST' and request.FILES.get('file'):
|
|
|
+
|
|
|
excel_file = request.FILES['file']
|
|
|
selected_pt = request.POST.get('product_type')
|
|
|
|
|
|
@@ -582,6 +622,54 @@ def get_title_creator_tasks_json(request):
|
|
|
'filename': t.original_filename or "Unknown File",
|
|
|
'status': t.status,
|
|
|
'url': t.download_url,
|
|
|
- 'date': t.created_at.strftime("%d %b %Y, %I:%M %p")
|
|
|
+ 'date': t.created_at.strftime("%d %b %Y, %I:%M %p"),
|
|
|
+ # Use a conditional (ternary) operator to handle the null case
|
|
|
+ 'completed_at': t.completed_at.strftime("%d %b %Y, %I:%M %p") if t.completed_at else ""
|
|
|
+ # 'completed_at': t.completed_at.strftime("%d %b %Y, %I:%M %p")
|
|
|
})
|
|
|
- return JsonResponse(data, safe=False)
|
|
|
+ return JsonResponse(data, safe=False)
|
|
|
+
|
|
|
+
|
|
|
+class TokenFetcherAPI(APIView):
|
|
|
+ def get(self, request):
|
|
|
+ token = fetch_global_industrial_token()
|
|
|
+
|
|
|
+ if token:
|
|
|
+ return JsonResponse({
|
|
|
+ "status": "success",
|
|
|
+ "token": token
|
|
|
+ }, status=status.HTTP_200_OK)
|
|
|
+
|
|
|
+ return JsonResponse({
|
|
|
+ "status": "error",
|
|
|
+ "message": "Could not retrieve token"
|
|
|
+ }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def fetch_global_industrial_token():
|
|
|
+ """Logic to scrape the token."""
|
|
|
+ base_url = "https://www.globalindustrial.com"
|
|
|
+ # Using cloudscraper to handle potential bot detection
|
|
|
+ scraper = cloudscraper.create_scraper()
|
|
|
+
|
|
|
+ try:
|
|
|
+ response = scraper.get(base_url, timeout=15)
|
|
|
+
|
|
|
+ # 1. Check Cookies
|
|
|
+ token = scraper.cookies.get('Authorization')
|
|
|
+ if token:
|
|
|
+ return token.replace('Bearer ', '').strip()
|
|
|
+
|
|
|
+ # 2. Check __NEXT_DATA__
|
|
|
+ soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
+ script_tag = soup.find('script', id='__NEXT_DATA__')
|
|
|
+ if script_tag:
|
|
|
+ data = json.loads(script_tag.string)
|
|
|
+ token = data.get('props', {}).get('pageProps', {}).get('token')
|
|
|
+ if token:
|
|
|
+ return token
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ return None
|
|
|
+ return None
|