|
|
@@ -17,7 +17,7 @@ from django.contrib.auth import authenticate, login, logout
|
|
|
# from django.contrib.auth.decorators import login_required
|
|
|
from .decorators import login_required
|
|
|
from django.contrib.auth.hashers import make_password
|
|
|
-
|
|
|
+import random
|
|
|
|
|
|
# To login
|
|
|
def login_view(request):
|
|
|
@@ -227,7 +227,7 @@ def extract_title_or_error(product, selected_pt):
|
|
|
w = dimensions["Width"].replace(" in", "").strip()
|
|
|
d = dimensions["Depth"].replace(" in", "").strip()
|
|
|
h = dimensions["Height"].replace(" in", "").strip()
|
|
|
- extracted_data["Dimensions"] = f'{w}"w x {d}"d x {h}"h'
|
|
|
+ extracted_data["Dimensions"] = f'{w}"W x {d}"D x {h}"H'
|
|
|
|
|
|
# 3. Build Title Parts
|
|
|
final_title_parts = []
|
|
|
@@ -253,7 +253,7 @@ def extract_title_or_error(product, selected_pt):
|
|
|
current_title = construct_string(final_title_parts)
|
|
|
|
|
|
# 4. Length Reduction Logic (Step-by-Step)
|
|
|
-
|
|
|
+ print("Current Title 1 ########",current_title,len(current_title))
|
|
|
# Step 1: Change "Capacity" -> "Cap."
|
|
|
if len(current_title) > 100:
|
|
|
for i, part in enumerate(final_title_parts):
|
|
|
@@ -261,6 +261,8 @@ def extract_title_or_error(product, selected_pt):
|
|
|
final_title_parts[i] = part.replace("Capacity", "Cap.")
|
|
|
current_title = construct_string(final_title_parts)
|
|
|
|
|
|
+ print("Current Title 2 ########",current_title,len(current_title))
|
|
|
+
|
|
|
# Step 2: Shorten Product Type (e.g., Stainless Steel -> SS)
|
|
|
# Step B: Dynamic Product Type Acronym
|
|
|
if len(current_title) > 100:
|
|
|
@@ -270,7 +272,7 @@ def extract_title_or_error(product, selected_pt):
|
|
|
# Takes first letter of every word in the Product Type
|
|
|
final_title_parts[1] = "".join([w[0].upper() for w in words])
|
|
|
current_title = construct_string(final_title_parts)
|
|
|
-
|
|
|
+ print("Current Title 3 ########",current_title,len(current_title))
|
|
|
# Step 3: Remove spaces from attributes starting from the back
|
|
|
# Brand (0) and Product Type (1) are skipped
|
|
|
if len(current_title) > 100:
|
|
|
@@ -280,7 +282,7 @@ def extract_title_or_error(product, selected_pt):
|
|
|
# Remove white spaces from the current attribute part
|
|
|
final_title_parts[i] = final_title_parts[i].replace(" ", "")
|
|
|
current_title = construct_string(final_title_parts)
|
|
|
-
|
|
|
+ print("Current Title 4 ########",current_title,len(current_title))
|
|
|
return current_title
|
|
|
|
|
|
def construct_dynamic_title(raw_data,selected_pt):
|
|
|
@@ -311,15 +313,39 @@ def title_creator_view(request):
|
|
|
|
|
|
headers = {"User-Agent": "Mozilla/5.0"}
|
|
|
results_for_ui = []
|
|
|
+ # Specific Headers for the Item# API
|
|
|
+ api_headers = {
|
|
|
+ "accept": "application/json, text/plain, */*",
|
|
|
+ "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
|
|
|
+ "client_id": "GEC",
|
|
|
+ "referer": "https://www.globalindustrial.com/"
|
|
|
+ }
|
|
|
|
|
|
# 3. Process each row
|
|
|
for index, row in df.iterrows():
|
|
|
url = row.get('URL') # Assumes your excel has a 'URL' column
|
|
|
+ item_number = row.get('Item#')
|
|
|
new_title = ""
|
|
|
+ final_url = None
|
|
|
+
|
|
|
+ # Step 1: Resolve the URL
|
|
|
+ if pd.notna(url) and str(url).startswith('http'):
|
|
|
+ final_url = url
|
|
|
+ elif pd.notna(item_number):
|
|
|
+ # Call API to get URL from Item#
|
|
|
+ api_url = f"https://www.globalindustrial.com/catalogApis/catalog/autosuggest?key={item_number}&features=true"
|
|
|
+ try:
|
|
|
+ api_resp = requests.get(api_url, headers=api_headers, timeout=10)
|
|
|
+ if api_resp.status_code == 200:
|
|
|
+ data = api_resp.json()
|
|
|
+ final_url = data.get('exactMatch', {}).get('canonicalLink')
|
|
|
+ except Exception as e:
|
|
|
+ new_title = f"API Error for Item# {item_number}"
|
|
|
+
|
|
|
|
|
|
- if pd.notna(url):
|
|
|
+ if pd.notna(final_url):
|
|
|
try:
|
|
|
- resp = requests.get(url, headers=headers, timeout=10)
|
|
|
+ resp = requests.get(final_url, headers=headers, timeout=10)
|
|
|
soup = BeautifulSoup(resp.content, 'html.parser')
|
|
|
script_tag = soup.find('script', id='__NEXT_DATA__')
|
|
|
|
|
|
@@ -338,11 +364,13 @@ def title_creator_view(request):
|
|
|
|
|
|
results_for_ui.append({
|
|
|
"id" : index + 1,
|
|
|
- "url": url,
|
|
|
+ "url": final_url,
|
|
|
"new_title": new_title,
|
|
|
"status": True
|
|
|
})
|
|
|
- time.sleep(1) # Safety delay
|
|
|
+ # Generates a random float between 3.0 and 7.0
|
|
|
+ time.sleep(random.uniform(3, 7))
|
|
|
+ # time.sleep(1) # Safety delay
|
|
|
|
|
|
# 4. Save the modified Excel to a new path
|
|
|
output_filename = f"processed_{excel_file.name}"
|
|
|
@@ -377,17 +405,39 @@ def process_excel_task(file_path, selected_pt, task_id):
|
|
|
df['New_Generated_Title'] = ""
|
|
|
|
|
|
headers = {"User-Agent": "Mozilla/5.0"}
|
|
|
+ # Specific Headers for the Item# API
|
|
|
+ api_headers = {
|
|
|
+ "accept": "application/json, text/plain, */*",
|
|
|
+ "authorization": "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJERVYifQ.uOFB7h7_Aw6jbA1HSqVJ44tKMO7E1ljz1kV_JddeKL64YCOH57-l1ZX2Lly-Jnhdnxk3xMAeW5FawAgymEaMKA",
|
|
|
+ "client_id": "GEC",
|
|
|
+ "referer": "https://www.globalindustrial.com/"
|
|
|
+ }
|
|
|
|
|
|
# 3. Process each row
|
|
|
for index, row in df.iterrows():
|
|
|
url = row.get('URL')
|
|
|
new_title = ""
|
|
|
-
|
|
|
- if pd.notna(url):
|
|
|
+ item_number = row.get('Item#')
|
|
|
+ final_url = None
|
|
|
+ # Step 1: Resolve the URL
|
|
|
+ if pd.notna(url) and str(url).startswith('http'):
|
|
|
+ final_url = url
|
|
|
+ elif pd.notna(item_number):
|
|
|
+ # Call API to get URL from Item#
|
|
|
+ api_url = f"https://www.globalindustrial.com/catalogApis/catalog/autosuggest?key={item_number}&features=true"
|
|
|
+ try:
|
|
|
+ api_resp = requests.get(api_url, headers=api_headers, timeout=10)
|
|
|
+ if api_resp.status_code == 200:
|
|
|
+ data = api_resp.json()
|
|
|
+ final_url = data.get('exactMatch', {}).get('canonicalLink')
|
|
|
+ except Exception as e:
|
|
|
+ new_title = f"API Error for Item# {item_number}"
|
|
|
+
|
|
|
+ if pd.notna(final_url):
|
|
|
try:
|
|
|
# Scraping logic
|
|
|
# resp = scraper.get(url, timeout=15)
|
|
|
- resp = requests.get(url, headers=headers, timeout=10)
|
|
|
+ resp = requests.get(final_url, headers=headers, timeout=10)
|
|
|
if resp.status_code == 200:
|
|
|
soup = BeautifulSoup(resp.content, 'html.parser')
|
|
|
script_tag = soup.find('script', id='__NEXT_DATA__')
|
|
|
@@ -412,7 +462,9 @@ def process_excel_task(file_path, selected_pt, task_id):
|
|
|
df.at[index, 'New_Generated_Title'] = new_title
|
|
|
|
|
|
# Optional: Sleep to prevent getting blocked by the server
|
|
|
- time.sleep(1)
|
|
|
+ # Generates a random float between 3.0 and 7.0
|
|
|
+ time.sleep(random.uniform(3, 7))
|
|
|
+ # time.sleep(1)
|
|
|
|
|
|
# 4. Save the modified Excel to the MEDIA folder
|
|
|
output_filename = f"completed_{task_id}_{task.original_filename}"
|