|
@@ -1883,10 +1883,601 @@ import time
|
|
|
# Configure logging
|
|
# Configure logging
|
|
|
logger = logging.getLogger(__name__)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
+# class GeminiAttributeService:
|
|
|
|
|
+# """Enhanced service with comprehensive AI suggestions and title structure analysis"""
|
|
|
|
|
+
|
|
|
|
|
+# def __init__(self):
|
|
|
|
|
+# api_key = getattr(settings, 'GEMINI_API_KEY', None)
|
|
|
|
|
+# if not api_key:
|
|
|
|
|
+# logger.error("GEMINI_API_KEY not found in settings")
|
|
|
|
|
+# raise ValueError("GEMINI_API_KEY not found in settings")
|
|
|
|
|
+
|
|
|
|
|
+# genai.configure(api_key=api_key)
|
|
|
|
|
+# self.model = genai.GenerativeModel('gemini-2.5-flash')
|
|
|
|
|
+# logger.info("GeminiAttributeService initialized successfully")
|
|
|
|
|
+
|
|
|
|
|
+# @retry(
|
|
|
|
|
+# stop=stop_after_attempt(3),
|
|
|
|
|
+# wait=wait_exponential(multiplier=1, min=2, max=10),
|
|
|
|
|
+# retry=retry_if_exception_type((Exception,))
|
|
|
|
|
+# )
|
|
|
|
|
+# def _call_gemini_api(self, prompt, max_tokens=8192, attempt=1):
|
|
|
|
|
+# """Helper method to call Gemini API with retry logic"""
|
|
|
|
|
+# logger.info(f"Calling Gemini API (attempt {attempt}, max_tokens={max_tokens})")
|
|
|
|
|
+# logger.debug(f"Prompt length: {len(prompt)} characters")
|
|
|
|
|
+
|
|
|
|
|
+# try:
|
|
|
|
|
+# response = self.model.generate_content(
|
|
|
|
|
+# prompt,
|
|
|
|
|
+# generation_config=genai.types.GenerationConfig(
|
|
|
|
|
+# temperature=0.2,
|
|
|
|
|
+# top_p=0.9,
|
|
|
|
|
+# top_k=40,
|
|
|
|
|
+# max_output_tokens=max_tokens,
|
|
|
|
|
+# response_mime_type="application/json"
|
|
|
|
|
+# ),
|
|
|
|
|
+# safety_settings={
|
|
|
|
|
+# genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
|
|
|
|
|
+# genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
|
|
|
|
|
+# genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
|
|
|
|
|
+# genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
|
|
|
|
|
+# }
|
|
|
|
|
+# )
|
|
|
|
|
+
|
|
|
|
|
+# logger.info(f"Gemini API call successful (attempt {attempt})")
|
|
|
|
|
+
|
|
|
|
|
+# # Log response metadata
|
|
|
|
|
+# if response and hasattr(response, 'candidates') and response.candidates:
|
|
|
|
|
+# candidate = response.candidates[0]
|
|
|
|
|
+# finish_reason = candidate.finish_reason.name if hasattr(candidate, 'finish_reason') else 'UNKNOWN'
|
|
|
|
|
+# logger.info(f"Response finish reason: {finish_reason}")
|
|
|
|
|
+
|
|
|
|
|
+# if hasattr(response, 'text'):
|
|
|
|
|
+# logger.debug(f"Response text length: {len(response.text)} characters")
|
|
|
|
|
+
|
|
|
|
|
+# return response
|
|
|
|
|
+
|
|
|
|
|
+# except genai.types.BlockedPromptException as e:
|
|
|
|
|
+# logger.error(f"Prompt blocked by safety filters (attempt {attempt}): {str(e)}")
|
|
|
|
|
+# logger.debug(f"Blocked prompt details: {traceback.format_exc()}")
|
|
|
|
|
+# raise
|
|
|
|
|
+
|
|
|
|
|
+# except genai.types.StopCandidateException as e:
|
|
|
|
|
+# logger.error(f"Generation stopped by candidate exception (attempt {attempt}): {str(e)}")
|
|
|
|
|
+# logger.debug(f"Stop candidate details: {traceback.format_exc()}")
|
|
|
|
|
+# raise
|
|
|
|
|
+
|
|
|
|
|
+# except Exception as e:
|
|
|
|
|
+# logger.error(f"Gemini API call failed (attempt {attempt}): {type(e).__name__} - {str(e)}")
|
|
|
|
|
+# logger.debug(f"Full exception traceback: {traceback.format_exc()}")
|
|
|
|
|
+# raise
|
|
|
|
|
+
|
|
|
|
|
+# def generate_comprehensive_suggestions(
|
|
|
|
|
+# self,
|
|
|
|
|
+# product: Dict,
|
|
|
|
|
+# issues: List[str],
|
|
|
|
|
+# category_rules: List[Dict],
|
|
|
|
|
+# scores: Dict
|
|
|
|
|
+# ) -> Dict:
|
|
|
|
|
+# """
|
|
|
|
|
+# Generate comprehensive AI suggestions covering ALL quality aspects
|
|
|
|
|
+# """
|
|
|
|
|
+# sku = product.get('sku', 'UNKNOWN')
|
|
|
|
|
+# logger.info(f"Generating comprehensive suggestions for SKU: {sku}")
|
|
|
|
|
+# logger.info(f"Total issues found: {len(issues)}")
|
|
|
|
|
+
|
|
|
|
|
+# try:
|
|
|
|
|
+# # Limit issues to prevent token overflow
|
|
|
|
|
+# original_issue_count = len(issues)
|
|
|
|
|
+# limited_issues = issues[:15] if len(issues) > 15 else issues
|
|
|
|
|
+
|
|
|
|
|
+# if original_issue_count > 15:
|
|
|
|
|
+# logger.warning(f"SKU {sku}: Limiting issues from {original_issue_count} to {len(limited_issues)}")
|
|
|
|
|
+
|
|
|
|
|
+# prompt = self._build_comprehensive_prompt(product, limited_issues, category_rules, scores)
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Prompt built successfully, length: {len(prompt)} chars")
|
|
|
|
|
+
|
|
|
|
|
+# # First attempt with full issues
|
|
|
|
|
+# response = self._call_gemini_api(prompt, max_tokens=8192, attempt=1)
|
|
|
|
|
+
|
|
|
|
|
+# if not response:
|
|
|
|
|
+# logger.error(f"SKU {sku}: No response object returned from API")
|
|
|
|
|
+# result = {
|
|
|
|
|
+# 'error': 'No response from AI',
|
|
|
|
|
+# 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
|
|
|
|
|
+# }
|
|
|
|
|
+# time.sleep(6)
|
|
|
|
|
+# return result
|
|
|
|
|
+
|
|
|
|
|
+# if not response.candidates:
|
|
|
|
|
+# logger.error(f"SKU {sku}: Response has no candidates")
|
|
|
|
|
+# result = {
|
|
|
|
|
+# 'error': 'No candidates in response',
|
|
|
|
|
+# 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
|
|
|
|
|
+# }
|
|
|
|
|
+# time.sleep(6)
|
|
|
|
|
+# return result
|
|
|
|
|
+
|
|
|
|
|
+# candidate = response.candidates[0]
|
|
|
|
|
+# finish_reason = candidate.finish_reason.name
|
|
|
|
|
+# logger.info(f"SKU {sku}: Finish reason: {finish_reason}")
|
|
|
|
|
+
|
|
|
|
|
+# # Handle non-STOP finish reasons
|
|
|
|
|
+# if finish_reason != "STOP":
|
|
|
|
|
+# logger.warning(f"SKU {sku}: Non-STOP finish reason: {finish_reason}")
|
|
|
|
|
+
|
|
|
|
|
+# # If MAX_TOKENS and we have many issues, retry with fewer
|
|
|
|
|
+# if finish_reason == "MAX_TOKENS" and len(limited_issues) > 8:
|
|
|
|
|
+# logger.info(f"SKU {sku}: Retrying with reduced issues (8 instead of {len(limited_issues)})")
|
|
|
|
|
+# return self.generate_comprehensive_suggestions(
|
|
|
|
|
+# product,
|
|
|
|
|
+# issues[:8],
|
|
|
|
|
+# category_rules,
|
|
|
|
|
+# scores
|
|
|
|
|
+# )
|
|
|
|
|
+
|
|
|
|
|
+# # If SAFETY, log details
|
|
|
|
|
+# if finish_reason == "SAFETY":
|
|
|
|
|
+# logger.error(f"SKU {sku}: Content blocked by safety filters")
|
|
|
|
|
+# if hasattr(candidate, 'safety_ratings'):
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Safety ratings: {candidate.safety_ratings}")
|
|
|
|
|
+
|
|
|
|
|
+# result = {
|
|
|
|
|
+# 'error': f'Response blocked: {finish_reason}',
|
|
|
|
|
+# 'finish_reason': finish_reason,
|
|
|
|
|
+# 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
|
|
|
|
|
+# }
|
|
|
|
|
+# time.sleep(6)
|
|
|
|
|
+# return result
|
|
|
|
|
+
|
|
|
|
|
+# # Parse successful response
|
|
|
|
|
+# logger.info(f"SKU {sku}: Parsing successful response")
|
|
|
|
|
+# suggestions = self._parse_response(response.text, sku)
|
|
|
|
|
+
|
|
|
|
|
+# if 'error' in suggestions:
|
|
|
|
|
+# logger.warning(f"SKU {sku}: Parse error occurred, adding fallback suggestions")
|
|
|
|
|
+# suggestions['fallback_suggestions'] = self._generate_fallback_suggestions(limited_issues)
|
|
|
|
|
+# else:
|
|
|
|
|
+# logger.info(f"SKU {sku}: Successfully generated and parsed AI suggestions")
|
|
|
|
|
+
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Sleeping 6 seconds to respect API rate limits")
|
|
|
|
|
+# time.sleep(200)
|
|
|
|
|
+
|
|
|
|
|
+# return suggestions
|
|
|
|
|
+
|
|
|
|
|
+# except Exception as e:
|
|
|
|
|
+# logger.error(f"SKU {sku}: Exception in generate_comprehensive_suggestions: {type(e).__name__} - {str(e)}")
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Full traceback: {traceback.format_exc()}")
|
|
|
|
|
+# result = {
|
|
|
|
|
+# 'error': f'{type(e).__name__}: {str(e)}',
|
|
|
|
|
+# 'fallback_suggestions': self._generate_fallback_suggestions(issues[:15])
|
|
|
|
|
+# }
|
|
|
|
|
+# time.sleep(200)
|
|
|
|
|
+# return result
|
|
|
|
|
+
|
|
|
|
|
+# def _build_comprehensive_prompt(
|
|
|
|
|
+# self,
|
|
|
|
|
+# product: Dict,
|
|
|
|
|
+# issues: List[str],
|
|
|
|
|
+# rules: List[Dict],
|
|
|
|
|
+# scores: Dict
|
|
|
|
|
+# ) -> str:
|
|
|
|
|
+# """Build comprehensive prompt for all quality aspects with title structure analysis"""
|
|
|
|
|
+# sku = product.get('sku', 'UNKNOWN')
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Building comprehensive prompt")
|
|
|
|
|
+
|
|
|
|
|
+# mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
|
|
|
|
|
+# valid_values_map = {
|
|
|
|
|
+# r['attribute_name']: r.get('valid_values', [])[:5]
|
|
|
|
|
+# for r in rules if r.get('valid_values')
|
|
|
|
|
+# }
|
|
|
|
|
+
|
|
|
|
|
+# # Categorize issues
|
|
|
|
|
+# attribute_issues = [i for i in issues if not any(prefix in i for prefix in ['Title:', 'Description:', 'SEO:'])]
|
|
|
|
|
+# title_issues = [i for i in issues if i.startswith('Title:')]
|
|
|
|
|
+# desc_issues = [i for i in issues if i.startswith('Description:')]
|
|
|
|
|
+# seo_issues = [i for i in issues if i.startswith('SEO:')]
|
|
|
|
|
+
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Issue breakdown - Attributes: {len(attribute_issues)}, Title: {len(title_issues)}, Description: {len(desc_issues)}, SEO: {len(seo_issues)}")
|
|
|
|
|
+
|
|
|
|
|
+# import random
|
|
|
|
|
+# quality_score_target = random.uniform(90.2, 95.9)
|
|
|
|
|
+
|
|
|
|
|
+# prompt = f"""Analyze this e-commerce product and provide comprehensive quality improvements including detailed title structure analysis.
|
|
|
|
|
+
|
|
|
|
|
+# Note: quality_score_prediction should be in range of 90 to 95
|
|
|
|
|
+
|
|
|
|
|
+# PRODUCT DATA:
|
|
|
|
|
+# SKU: {product.get('sku')}
|
|
|
|
|
+# Category: {product.get('category')}
|
|
|
|
|
+# Title: {product.get('title', '')[:250]}
|
|
|
|
|
+# Description: {product.get('description', '')[:400]}
|
|
|
|
|
+# Attributes: {json.dumps(product.get('attributes', {}), ensure_ascii=False)}
|
|
|
|
|
+
|
|
|
|
|
+# QUALITY SCORES (out of 100):
|
|
|
|
|
+# - Mandatory Fields: {scores.get('mandatory_fields', 0):.1f}
|
|
|
|
|
+# - Standardization: {scores.get('standardization', 0):.1f}
|
|
|
|
|
+# - Missing Values: {scores.get('missing_values', 0):.1f}
|
|
|
|
|
+# - Consistency: {scores.get('consistency', 0):.1f}
|
|
|
|
|
+# - SEO: {scores.get('seo_discoverability', 0):.1f}
|
|
|
|
|
+# - Title Quality: {scores.get('title_quality', 0):.1f}
|
|
|
|
|
+# - Description Quality: {scores.get('description_quality', 0):.1f}
|
|
|
|
|
+
|
|
|
|
|
+# CATEGORY RULES:
|
|
|
|
|
+# Mandatory Attributes: {', '.join(mandatory_attrs)}
|
|
|
|
|
+# Valid Values: {json.dumps(valid_values_map, ensure_ascii=False)}
|
|
|
|
|
+
|
|
|
|
|
+# ISSUES FOUND:
|
|
|
|
|
+# Attributes ({len(attribute_issues)}):
|
|
|
|
|
+# {chr(10).join(f" • {i}" for i in attribute_issues[:8])}
|
|
|
|
|
+
|
|
|
|
|
+# Title ({len(title_issues)}):
|
|
|
|
|
+# {chr(10).join(f" • {i}" for i in title_issues[:5])}
|
|
|
|
|
+
|
|
|
|
|
+# Description ({len(desc_issues)}):
|
|
|
|
|
+# {chr(10).join(f" • {i}" for i in desc_issues[:5])}
|
|
|
|
|
+
|
|
|
|
|
+# SEO ({len(seo_issues)}):
|
|
|
|
|
+# {chr(10).join(f" • {i}" for i in seo_issues[:5])}
|
|
|
|
|
+
|
|
|
|
|
+# CATEGORY-SPECIFIC TITLE STRUCTURE GUIDELINES:
|
|
|
|
|
+
|
|
|
|
|
+# For T-Shirts:
|
|
|
|
|
+# Recommended sequence: Brand + Gender + Product Type + Key Feature + Material + Size + Color + Pack Size
|
|
|
|
|
+# Element explanations:
|
|
|
|
|
+# - Brand: Builds trust and improves SEO ranking
|
|
|
|
|
+# - Gender: Targets specific audience (Men's/Women's/Unisex)
|
|
|
|
|
+# - Product Type: Core identifier (T-Shirt, Tee, Polo)
|
|
|
|
|
+# - Key Feature: Differentiator (Slim Fit, V-Neck, Graphic)
|
|
|
|
|
+# - Material: Search relevance (Cotton, Polyester, Blend)
|
|
|
|
|
+# - Size: Conversion factor (S/M/L/XL or Specific measurements)
|
|
|
|
|
+# - Color: Visual match (Black, White, Navy Blue)
|
|
|
|
|
+# - Pack Size: Value indicator (Pack of 3, Single)
|
|
|
|
|
+
|
|
|
|
|
+# Examples:
|
|
|
|
|
+# ✓ Good: "Nike Men's Slim Fit Cotton T-Shirt, Black, Large"
|
|
|
|
|
+# ✓ Good: "Hanes Women's V-Neck Polyester Blend T-Shirt Pack of 3, White, Medium"
|
|
|
|
|
+# ✗ Bad: "Nice T-Shirt for Men" (missing brand, features, specifics)
|
|
|
|
|
+# ✗ Bad: "SUPER COMFORTABLE AMAZING TSHIRT BLACK" (all caps, no structure)
|
|
|
|
|
+
|
|
|
|
|
+# For Food:
|
|
|
|
|
+# Recommended sequence: Brand + Product Name + Flavor/Variety + Size/Weight + Type + Pack Size
|
|
|
|
|
+# Element explanations:
|
|
|
|
|
+# - Brand: Recognition and trust (Kellogg's, Organic Valley)
|
|
|
|
|
+# - Product Name: Core identity (Corn Flakes, Whole Milk)
|
|
|
|
|
+# - Flavor/Variety: Taste appeal (Original, Chocolate, Strawberry)
|
|
|
|
|
+# - Size/Weight: Practical info (18 oz, 1 Gallon, 500g)
|
|
|
|
|
+# - Type: Dietary needs (Organic, Gluten-Free, Low-Fat)
|
|
|
|
|
+# - Pack Size: Bulk value (Box, 6-Pack, Family Size)
|
|
|
|
|
+
|
|
|
|
|
+# Examples:
|
|
|
|
|
+# ✓ Good: "Kellogg's Corn Flakes Cereal, Original Flavor, 18 oz Box"
|
|
|
|
|
+# ✓ Good: "Organic Valley Whole Milk, 1 Gallon, Grass-Fed"
|
|
|
|
|
+# ✗ Bad: "Delicious Cereal" (missing brand, specifics, size)
|
|
|
|
|
+# ✗ Bad: "Food Product 500g" (generic, no appeal)
|
|
|
|
|
+
|
|
|
|
|
+# For Chairs:
|
|
|
|
|
+# Recommended sequence: Brand + Type + Key Feature + Material + Color + Additional Features
|
|
|
|
|
+# Element explanations:
|
|
|
|
|
+# - Brand: Quality assurance (Herman Miller, IKEA)
|
|
|
|
|
+# - Type: Category search (Office Chair, Desk Chair, Gaming Chair)
|
|
|
|
|
+# - Key Feature: Differentiator (Ergonomic, High Back, Swivel)
|
|
|
|
|
+# - Material: Durability info (Mesh, Leather, Fabric)
|
|
|
|
|
+# - Color: Aesthetic match (Black, Gray, White)
|
|
|
|
|
+# - Additional Features: Conversion boost (Adjustable Arms, Lumbar Support)
|
|
|
|
|
+
|
|
|
|
|
+# Examples:
|
|
|
|
|
+# ✓ Good: "Herman Miller Aeron Ergonomic Office Chair, Mesh Fabric, Black, Adjustable Arms"
|
|
|
|
|
+# ✓ Good: "IKEA Markus Swivel Desk Chair, Leather, Gray, High Back"
|
|
|
|
|
+# ✗ Bad: "Comfortable Chair" (missing brand, type, features)
|
|
|
|
|
+# ✗ Bad: "Chair for Office Black Color" (awkward structure, no features)
|
|
|
|
|
+
|
|
|
|
|
+# CRITICAL INSTRUCTION - TITLE STRUCTURE ANALYSIS:
|
|
|
|
|
+# You MUST analyze the current product title and identify which elements are present or missing based on the category-specific structure above. For each element in the recommended sequence, indicate:
|
|
|
|
|
+# - "present": The element exists in the title with the actual value found
|
|
|
|
|
+# - "missing": The element is not in the title
|
|
|
|
|
+# - "value": The actual text/value found for that element (if present)
|
|
|
|
|
+
|
|
|
|
|
+# Return ONLY this JSON structure:
|
|
|
|
|
+# {{
|
|
|
|
|
+# "title_structure_analysis": {{
|
|
|
|
|
+# "category": "T-Shirts/Food/Chairs",
|
|
|
|
|
+# "recommended_sequence": ["Brand", "Gender", "Product Type", "Key Feature", "Material", "Size", "Color", "Pack Size"],
|
|
|
|
|
+# "current_title_breakdown": {{
|
|
|
|
|
+# "Brand": {{"status": "present/missing", "value": "Nike" or null, "explanation": "why it matters"}},
|
|
|
|
|
+# "Gender": {{"status": "present/missing", "value": "Men's" or null, "explanation": "targets audience"}},
|
|
|
|
|
+# "Product Type": {{"status": "present/missing", "value": "T-Shirt" or null, "explanation": "core identifier"}},
|
|
|
|
|
+# "Key Feature": {{"status": "present/missing", "value": "Slim Fit" or null, "explanation": "differentiator"}},
|
|
|
|
|
+# "Material": {{"status": "present/missing", "value": "Cotton" or null, "explanation": "search relevance"}},
|
|
|
|
|
+# "Size": {{"status": "present/missing", "value": "Large" or null, "explanation": "conversion factor"}},
|
|
|
|
|
+# "Color": {{"status": "present/missing", "value": "Black" or null, "explanation": "visual match"}},
|
|
|
|
|
+# "Pack Size": {{"status": "present/missing", "value": null, "explanation": "value indicator"}}
|
|
|
|
|
+# }},
|
|
|
|
|
+# "completeness_score": 75,
|
|
|
|
|
+# "missing_elements": ["Size", "Pack Size"],
|
|
|
|
|
+# "structure_quality": "good/fair/poor",
|
|
|
|
|
+# "structure_notes": "Brief assessment of title structure quality"
|
|
|
|
|
+# }},
|
|
|
|
|
+# "corrected_attributes": {{
|
|
|
|
|
+# "attr_name": "corrected_value"
|
|
|
|
|
+# }},
|
|
|
|
|
+# "missing_attributes": {{
|
|
|
|
|
+# "attr_name": "suggested_value"
|
|
|
|
|
+# }},
|
|
|
|
|
+# "improved_title": "optimized title following recommended sequence with all elements",
|
|
|
|
|
+# "improved_description": "enhanced description (50-150 words, features, benefits, specs, use cases)",
|
|
|
|
|
+# "seo_keywords": ["keyword1", "keyword2", "keyword3"],
|
|
|
|
|
+# "improvements": [
|
|
|
|
|
+# {{
|
|
|
|
|
+# "component": "attributes/title/description/seo",
|
|
|
|
|
+# "issue": "specific issue",
|
|
|
|
|
+# "suggestion": "how to fix",
|
|
|
|
|
+# "priority": "high/medium/low",
|
|
|
|
|
+# "confidence": "high/medium/low"
|
|
|
|
|
+# }}
|
|
|
|
|
+# ],
|
|
|
|
|
+# "quality_score_prediction": {quality_score_target:.1f},
|
|
|
|
|
+# "summary": "Brief 2-3 sentence summary of key improvements needed"
|
|
|
|
|
+# }}
|
|
|
|
|
+
|
|
|
|
|
+# CRITICAL: Keep response under 7000 tokens. Focus on top 5 most impactful improvements and complete title structure analysis."""
|
|
|
|
|
+
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Prompt built, final length: {len(prompt)} characters")
|
|
|
|
|
+# return prompt
|
|
|
|
|
+
|
|
|
|
|
+# def _parse_response(self, response_text: str, sku: str = 'UNKNOWN') -> Dict:
|
|
|
|
|
+# """Enhanced JSON parsing with fallback strategies"""
|
|
|
|
|
+# logger.info(f"SKU {sku}: Parsing response")
|
|
|
|
|
+
|
|
|
|
|
+# if not response_text or not response_text.strip():
|
|
|
|
|
+# logger.error(f"SKU {sku}: Empty response text")
|
|
|
|
|
+# return {'error': 'Empty response from API'}
|
|
|
|
|
+
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Response text length: {len(response_text)} characters")
|
|
|
|
|
+
|
|
|
|
|
+# try:
|
|
|
|
|
+# # Strategy 1: Direct JSON parse
|
|
|
|
|
+# try:
|
|
|
|
|
+# parsed = json.loads(response_text)
|
|
|
|
|
+# logger.info(f"SKU {sku}: Successfully parsed JSON directly")
|
|
|
|
|
+# return parsed
|
|
|
|
|
+# except json.JSONDecodeError as e:
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Direct JSON parse failed: {str(e)}")
|
|
|
|
|
+
|
|
|
|
|
+# # Strategy 2: Remove markdown code blocks
|
|
|
|
|
+# cleaned = response_text.strip()
|
|
|
|
|
+# if '```' in cleaned:
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Attempting to remove markdown code blocks")
|
|
|
|
|
+# match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', cleaned, re.DOTALL)
|
|
|
|
|
+# if match:
|
|
|
|
|
+# cleaned = match.group(1)
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Extracted JSON from code block")
|
|
|
|
|
+# else:
|
|
|
|
|
+# cleaned = re.sub(r'```(?:json)?', '', cleaned).strip()
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Removed code block markers")
|
|
|
|
|
+
|
|
|
|
|
+# # Strategy 3: Find first { and last }
|
|
|
|
|
+# first_brace = cleaned.find('{')
|
|
|
|
|
+# last_brace = cleaned.rfind('}')
|
|
|
|
|
+
|
|
|
|
|
+# if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
|
|
|
|
|
+# cleaned = cleaned[first_brace:last_brace + 1]
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Extracted JSON between braces, length: {len(cleaned)}")
|
|
|
|
|
+
|
|
|
|
|
+# # Strategy 4: Try parsing cleaned JSON
|
|
|
|
|
+# try:
|
|
|
|
|
+# parsed = json.loads(cleaned)
|
|
|
|
|
+# logger.info(f"SKU {sku}: Successfully parsed JSON after cleaning")
|
|
|
|
|
+# return parsed
|
|
|
|
|
+# except json.JSONDecodeError as e:
|
|
|
|
|
+# logger.debug(f"SKU {sku}: JSON parse failed after cleaning: {str(e)}")
|
|
|
|
|
+
|
|
|
|
|
+# # Strategy 5: Fix common JSON issues
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Attempting JSON syntax fixes")
|
|
|
|
|
+# cleaned = self._fix_json_syntax(cleaned)
|
|
|
|
|
+# try:
|
|
|
|
|
+# parsed = json.loads(cleaned)
|
|
|
|
|
+# logger.info(f"SKU {sku}: Successfully parsed JSON after syntax fixes")
|
|
|
|
|
+# return parsed
|
|
|
|
|
+# except json.JSONDecodeError as e:
|
|
|
|
|
+# logger.debug(f"SKU {sku}: JSON parse failed after syntax fixes: {str(e)}")
|
|
|
|
|
+
|
|
|
|
|
+# # Strategy 6: Extract partial valid JSON
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Attempting partial JSON extraction")
|
|
|
|
|
+# partial_json = self._extract_partial_json(cleaned)
|
|
|
|
|
+# if partial_json:
|
|
|
|
|
+# logger.warning(f"SKU {sku}: Using partial JSON response")
|
|
|
|
|
+# return partial_json
|
|
|
|
|
+
|
|
|
|
|
+# # All strategies failed
|
|
|
|
|
+# logger.error(f"SKU {sku}: All JSON parsing strategies failed")
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Response preview: {response_text[:500]}")
|
|
|
|
|
+# return {
|
|
|
|
|
+# 'error': 'Failed to parse AI response',
|
|
|
|
|
+# 'raw_response': response_text[:500]
|
|
|
|
|
+# }
|
|
|
|
|
+
|
|
|
|
|
+# except Exception as e:
|
|
|
|
|
+# logger.error(f"SKU {sku}: Parse exception: {type(e).__name__} - {str(e)}")
|
|
|
|
|
+# logger.debug(f"SKU {sku}: Full traceback: {traceback.format_exc()}")
|
|
|
|
|
+# return {
|
|
|
|
|
+# 'error': f'Parse exception: {str(e)}',
|
|
|
|
|
+# 'raw_response': response_text[:500] if response_text else 'None'
|
|
|
|
|
+# }
|
|
|
|
|
+
|
|
|
|
|
+# def _fix_json_syntax(self, json_str: str) -> str:
|
|
|
|
|
+# """Fix common JSON syntax issues"""
|
|
|
|
|
+# try:
|
|
|
|
|
+# # Remove trailing commas before closing brackets
|
|
|
|
|
+# json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
|
|
|
|
|
+
|
|
|
|
|
+# # Remove trailing content after final }
|
|
|
|
|
+# last_brace = json_str.rfind('}')
|
|
|
|
|
+# if last_brace != -1:
|
|
|
|
|
+# json_str = json_str[:last_brace + 1]
|
|
|
|
|
+
|
|
|
|
|
+# # Remove any non-printable characters
|
|
|
|
|
+# json_str = ''.join(char for char in json_str if char.isprintable() or char in '\n\r\t')
|
|
|
|
|
+
|
|
|
|
|
+# return json_str
|
|
|
|
|
+# except Exception as e:
|
|
|
|
|
+# logger.debug(f"Error in _fix_json_syntax: {str(e)}")
|
|
|
|
|
+# return json_str
|
|
|
|
|
+
|
|
|
|
|
+# def _extract_partial_json(self, json_str: str) -> Dict:
|
|
|
|
|
+# """Extract valid partial JSON"""
|
|
|
|
|
+# try:
|
|
|
|
|
+# depth = 0
|
|
|
|
|
+# start_idx = json_str.find('{')
|
|
|
|
|
+# if start_idx == -1:
|
|
|
|
|
+# return None
|
|
|
|
|
+
|
|
|
|
|
+# for i in range(start_idx, len(json_str)):
|
|
|
|
|
+# if json_str[i] == '{':
|
|
|
|
|
+# depth += 1
|
|
|
|
|
+# elif json_str[i] == '}':
|
|
|
|
|
+# depth -= 1
|
|
|
|
|
+# if depth == 0:
|
|
|
|
|
+# try:
|
|
|
|
|
+# return json.loads(json_str[start_idx:i+1])
|
|
|
|
|
+# except:
|
|
|
|
|
+# continue
|
|
|
|
|
+# return None
|
|
|
|
|
+# except Exception as e:
|
|
|
|
|
+# logger.debug(f"Error in _extract_partial_json: {str(e)}")
|
|
|
|
|
+# return None
|
|
|
|
|
+
|
|
|
|
|
+# def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
|
|
|
|
|
+# """Generate fallback suggestions based on issues"""
|
|
|
|
|
+# logger.info(f"Generating fallback suggestions for {len(issues)} issues")
|
|
|
|
|
+# suggestions = []
|
|
|
|
|
+
|
|
|
|
|
+# for issue in issues[:15]:
|
|
|
|
|
+# suggestion_text = "Review and correct this issue"
|
|
|
|
|
+# confidence = "medium"
|
|
|
|
|
+# component = "attribute"
|
|
|
|
|
+# priority = "medium"
|
|
|
|
|
+
|
|
|
|
|
+# issue_lower = issue.lower()
|
|
|
|
|
+
|
|
|
|
|
+# # Determine component
|
|
|
|
|
+# if issue.startswith('Title:'):
|
|
|
|
|
+# component = "title"
|
|
|
|
|
+# elif issue.startswith('Description:'):
|
|
|
|
|
+# component = "description"
|
|
|
|
|
+# elif issue.startswith('SEO:'):
|
|
|
|
|
+# component = "seo"
|
|
|
|
|
+
|
|
|
|
|
+# # Specific suggestions
|
|
|
|
|
+# if "missing mandatory" in issue_lower:
|
|
|
|
|
+# attr = issue.split(":")[-1].strip()
|
|
|
|
|
+# suggestion_text = f"Add required {attr} - check product packaging or manufacturer details"
|
|
|
|
|
+# priority = "high"
|
|
|
|
|
+# confidence = "high"
|
|
|
|
|
+# elif "too short" in issue_lower:
|
|
|
|
|
+# if "title" in issue_lower:
|
|
|
|
|
+# suggestion_text = "Expand title to 50-100 characters including brand, model, and key features"
|
|
|
|
|
+# component = "title"
|
|
|
|
|
+# priority = "high"
|
|
|
|
|
+# elif "description" in issue_lower:
|
|
|
|
|
+# suggestion_text = "Write comprehensive 50-150 word description with features, benefits, and specifications"
|
|
|
|
|
+# component = "description"
|
|
|
|
|
+# priority = "high"
|
|
|
|
|
+# else:
|
|
|
|
|
+# suggestion_text = "Provide more detailed information"
|
|
|
|
|
+# elif "placeholder" in issue_lower:
|
|
|
|
|
+# suggestion_text = "Replace with actual product data from manufacturer or packaging"
|
|
|
|
|
+# priority = "high"
|
|
|
|
|
+# elif "grammar" in issue_lower or "spelling" in issue_lower:
|
|
|
|
|
+# suggestion_text = "Run spell-check and grammar review, ensure professional language"
|
|
|
|
|
+# component = "description"
|
|
|
|
|
+# priority = "medium"
|
|
|
|
|
+# elif "keyword" in issue_lower or "seo" in issue_lower:
|
|
|
|
|
+# suggestion_text = "Add relevant search keywords and product attributes"
|
|
|
|
|
+# component = "seo"
|
|
|
|
|
+# priority = "medium"
|
|
|
|
|
+# elif "duplicate" in issue_lower or "repetit" in issue_lower:
|
|
|
|
|
+# suggestion_text = "Remove duplicate content, provide varied information with unique details"
|
|
|
|
|
+# component = "description"
|
|
|
|
|
+# priority = "medium"
|
|
|
|
|
+# elif "not recognized" in issue_lower or "invalid" in issue_lower:
|
|
|
|
|
+# suggestion_text = "Use standardized values from category rules"
|
|
|
|
|
+# priority = "high"
|
|
|
|
|
+# confidence = "high"
|
|
|
|
|
+
|
|
|
|
|
+# suggestions.append({
|
|
|
|
|
+# 'component': component,
|
|
|
|
|
+# 'issue': issue,
|
|
|
|
|
+# 'suggestion': suggestion_text,
|
|
|
|
|
+# 'priority': priority,
|
|
|
|
|
+# 'confidence': confidence
|
|
|
|
|
+# })
|
|
|
|
|
+
|
|
|
|
|
+# logger.info(f"Generated {len(suggestions)} fallback suggestions")
|
|
|
|
|
+# return suggestions
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+# gemini_service_enhanced.py
|
|
|
|
|
+"""
|
|
|
|
|
+Enhanced Gemini service with comprehensive suggestions and title structure analysis
|
|
|
|
|
+Includes thread pool executor for parallel processing with rate limiting
|
|
|
|
|
+"""
|
|
|
|
|
+import google.generativeai as genai
|
|
|
|
|
+import json
|
|
|
|
|
+import logging
|
|
|
|
|
+import re
|
|
|
|
|
+import time
|
|
|
|
|
+import threading
|
|
|
|
|
+from typing import Dict, List
|
|
|
|
|
+from django.conf import settings
|
|
|
|
|
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
|
|
|
+from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
|
+import traceback
|
|
|
|
|
+
|
|
|
|
|
+# Configure logging
|
|
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
|
|
+
|
|
|
|
|
+# Global rate limiter
|
|
|
|
|
+class RateLimiter:
|
|
|
|
|
+ """Thread-safe rate limiter for API calls"""
|
|
|
|
|
+ def __init__(self, max_calls_per_minute=10):
|
|
|
|
|
+ self.max_calls = max_calls_per_minute
|
|
|
|
|
+ self.calls = []
|
|
|
|
|
+ self.lock = threading.Lock()
|
|
|
|
|
+
|
|
|
|
|
+ def wait_if_needed(self):
|
|
|
|
|
+ """Wait if rate limit would be exceeded"""
|
|
|
|
|
+ with self.lock:
|
|
|
|
|
+ now = time.time()
|
|
|
|
|
+ # Remove calls older than 60 seconds
|
|
|
|
|
+ self.calls = [call_time for call_time in self.calls if now - call_time < 60]
|
|
|
|
|
+
|
|
|
|
|
+ if len(self.calls) >= self.max_calls:
|
|
|
|
|
+ # Calculate wait time
|
|
|
|
|
+ oldest_call = min(self.calls)
|
|
|
|
|
+ wait_time = 60 - (now - oldest_call) + 1 # +1 for safety margin
|
|
|
|
|
+ if wait_time > 0:
|
|
|
|
|
+ logger.info(f"Rate limit reached. Waiting {wait_time:.2f} seconds...")
|
|
|
|
|
+ time.sleep(wait_time)
|
|
|
|
|
+ # Clean up old calls again after waiting
|
|
|
|
|
+ now = time.time()
|
|
|
|
|
+ self.calls = [call_time for call_time in self.calls if now - call_time < 60]
|
|
|
|
|
+
|
|
|
|
|
+ # Record this call
|
|
|
|
|
+ self.calls.append(time.time())
|
|
|
|
|
+ logger.debug(f"Rate limiter: {len(self.calls)} calls in last 60 seconds")
|
|
|
|
|
+
|
|
|
class GeminiAttributeService:
|
|
class GeminiAttributeService:
|
|
|
"""Enhanced service with comprehensive AI suggestions and title structure analysis"""
|
|
"""Enhanced service with comprehensive AI suggestions and title structure analysis"""
|
|
|
|
|
|
|
|
- def __init__(self):
|
|
|
|
|
|
|
+ def __init__(self, max_workers=3, max_calls_per_minute=10):
|
|
|
api_key = getattr(settings, 'GEMINI_API_KEY', None)
|
|
api_key = getattr(settings, 'GEMINI_API_KEY', None)
|
|
|
if not api_key:
|
|
if not api_key:
|
|
|
logger.error("GEMINI_API_KEY not found in settings")
|
|
logger.error("GEMINI_API_KEY not found in settings")
|
|
@@ -1894,15 +2485,20 @@ class GeminiAttributeService:
|
|
|
|
|
|
|
|
genai.configure(api_key=api_key)
|
|
genai.configure(api_key=api_key)
|
|
|
self.model = genai.GenerativeModel('gemini-2.5-flash')
|
|
self.model = genai.GenerativeModel('gemini-2.5-flash')
|
|
|
- logger.info("GeminiAttributeService initialized successfully")
|
|
|
|
|
|
|
+ self.rate_limiter = RateLimiter(max_calls_per_minute=max_calls_per_minute)
|
|
|
|
|
+ self.max_workers = max_workers
|
|
|
|
|
+ logger.info(f"GeminiAttributeService initialized with {max_workers} workers, {max_calls_per_minute} calls/min")
|
|
|
|
|
|
|
|
@retry(
|
|
@retry(
|
|
|
stop=stop_after_attempt(3),
|
|
stop=stop_after_attempt(3),
|
|
|
- wait=wait_exponential(multiplier=1, min=2, max=10),
|
|
|
|
|
|
|
+ wait=wait_exponential(multiplier=2, min=4, max=30),
|
|
|
retry=retry_if_exception_type((Exception,))
|
|
retry=retry_if_exception_type((Exception,))
|
|
|
)
|
|
)
|
|
|
def _call_gemini_api(self, prompt, max_tokens=8192, attempt=1):
|
|
def _call_gemini_api(self, prompt, max_tokens=8192, attempt=1):
|
|
|
- """Helper method to call Gemini API with retry logic"""
|
|
|
|
|
|
|
+ """Helper method to call Gemini API with retry logic and rate limiting"""
|
|
|
|
|
+ # Wait if rate limit would be exceeded
|
|
|
|
|
+ self.rate_limiter.wait_if_needed()
|
|
|
|
|
+
|
|
|
logger.info(f"Calling Gemini API (attempt {attempt}, max_tokens={max_tokens})")
|
|
logger.info(f"Calling Gemini API (attempt {attempt}, max_tokens={max_tokens})")
|
|
|
logger.debug(f"Prompt length: {len(prompt)} characters")
|
|
logger.debug(f"Prompt length: {len(prompt)} characters")
|
|
|
|
|
|
|
@@ -1950,8 +2546,75 @@ class GeminiAttributeService:
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
logger.error(f"Gemini API call failed (attempt {attempt}): {type(e).__name__} - {str(e)}")
|
|
logger.error(f"Gemini API call failed (attempt {attempt}): {type(e).__name__} - {str(e)}")
|
|
|
logger.debug(f"Full exception traceback: {traceback.format_exc()}")
|
|
logger.debug(f"Full exception traceback: {traceback.format_exc()}")
|
|
|
|
|
+
|
|
|
|
|
+ # Add extra delay for ResourceExhausted errors
|
|
|
|
|
+ if 'ResourceExhausted' in str(type(e)) or 'RESOURCE_EXHAUSTED' in str(e):
|
|
|
|
|
+ delay = 30 if attempt == 1 else 60
|
|
|
|
|
+ logger.warning(f"ResourceExhausted detected, waiting {delay} seconds before retry...")
|
|
|
|
|
+ time.sleep(delay)
|
|
|
|
|
+
|
|
|
raise
|
|
raise
|
|
|
|
|
|
|
|
|
|
+ def generate_comprehensive_suggestions_batch(
|
|
|
|
|
+ self,
|
|
|
|
|
+ products: List[Dict],
|
|
|
|
|
+ issues_list: List[List[str]],
|
|
|
|
|
+ category_rules_list: List[List[Dict]],
|
|
|
|
|
+ scores_list: List[Dict]
|
|
|
|
|
+ ) -> List[Dict]:
|
|
|
|
|
+ """
|
|
|
|
|
+ Generate comprehensive AI suggestions for multiple products in parallel
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ products: List of product dictionaries
|
|
|
|
|
+ issues_list: List of issues for each product
|
|
|
|
|
+ category_rules_list: List of category rules for each product
|
|
|
|
|
+ scores_list: List of scores for each product
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ List of suggestion dictionaries in the same order as input
|
|
|
|
|
+ """
|
|
|
|
|
+ total_products = len(products)
|
|
|
|
|
+ logger.info(f"Starting batch processing for {total_products} products with {self.max_workers} workers")
|
|
|
|
|
+
|
|
|
|
|
+ results = [None] * total_products # Preserve order
|
|
|
|
|
+
|
|
|
|
|
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
|
|
|
+ # Submit all tasks
|
|
|
|
|
+ future_to_index = {}
|
|
|
|
|
+ for idx, (product, issues, rules, scores) in enumerate(zip(
|
|
|
|
|
+ products, issues_list, category_rules_list, scores_list
|
|
|
|
|
+ )):
|
|
|
|
|
+ future = executor.submit(
|
|
|
|
|
+ self.generate_comprehensive_suggestions,
|
|
|
|
|
+ product, issues, rules, scores
|
|
|
|
|
+ )
|
|
|
|
|
+ future_to_index[future] = idx
|
|
|
|
|
+
|
|
|
|
|
+ # Collect results as they complete
|
|
|
|
|
+ completed = 0
|
|
|
|
|
+ for future in as_completed(future_to_index):
|
|
|
|
|
+ idx = future_to_index[future]
|
|
|
|
|
+ sku = products[idx].get('sku', 'UNKNOWN')
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ result = future.result()
|
|
|
|
|
+ results[idx] = result
|
|
|
|
|
+ completed += 1
|
|
|
|
|
+ logger.info(f"Completed {completed}/{total_products}: SKU {sku}")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.error(f"Failed to process SKU {sku}: {type(e).__name__} - {str(e)}")
|
|
|
|
|
+ results[idx] = {
|
|
|
|
|
+ 'error': f'{type(e).__name__}: {str(e)}',
|
|
|
|
|
+ 'fallback_suggestions': self._generate_fallback_suggestions(
|
|
|
|
|
+ issues_list[idx][:15] if idx < len(issues_list) else []
|
|
|
|
|
+ )
|
|
|
|
|
+ }
|
|
|
|
|
+ completed += 1
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"Batch processing complete: {completed}/{total_products} products processed")
|
|
|
|
|
+ return results
|
|
|
|
|
+
|
|
|
def generate_comprehensive_suggestions(
|
|
def generate_comprehensive_suggestions(
|
|
|
self,
|
|
self,
|
|
|
product: Dict,
|
|
product: Dict,
|
|
@@ -2052,7 +2715,6 @@ class GeminiAttributeService:
|
|
|
'error': f'{type(e).__name__}: {str(e)}',
|
|
'error': f'{type(e).__name__}: {str(e)}',
|
|
|
'fallback_suggestions': self._generate_fallback_suggestions(issues[:15])
|
|
'fallback_suggestions': self._generate_fallback_suggestions(issues[:15])
|
|
|
}
|
|
}
|
|
|
- time.sleep(6)
|
|
|
|
|
return result
|
|
return result
|
|
|
|
|
|
|
|
def _build_comprehensive_prompt(
|
|
def _build_comprehensive_prompt(
|
|
@@ -2414,4 +3076,7 @@ CRITICAL: Keep response under 7000 tokens. Focus on top 5 most impactful improve
|
|
|
})
|
|
})
|
|
|
|
|
|
|
|
logger.info(f"Generated {len(suggestions)} fallback suggestions")
|
|
logger.info(f"Generated {len(suggestions)} fallback suggestions")
|
|
|
- return suggestions
|
|
|
|
|
|
|
+ return suggestions
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|