gemini_service.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810
  1. # #gemini_service.py
  2. # import google.generativeai as genai
  3. # import json
  4. # import logging
  5. # import re
  6. # from typing import Dict, List
  7. # from django.conf import settings
  8. # from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
  9. # logger = logging.getLogger(__name__)
  10. # class GeminiAttributeService:
  11. # """Service to interact with Google Gemini API for attribute and SEO suggestions"""
  12. # def __init__(self):
  13. # # Configure Gemini API
  14. # api_key = getattr(settings, 'GEMINI_API_KEY', None)
  15. # if not api_key:
  16. # raise ValueError("GEMINI_API_KEY not found in settings")
  17. # genai.configure(api_key=api_key)
  18. # self.model = genai.GenerativeModel('gemini-2.0-flash-exp') # Use latest model
  19. # @retry(
  20. # stop=stop_after_attempt(3),
  21. # wait=wait_exponential(multiplier=1, min=2, max=10),
  22. # retry=retry_if_exception_type(Exception),
  23. # before_sleep=lambda retry_state: logger.info(f"Retrying Gemini API call, attempt {retry_state.attempt_number}")
  24. # )
  25. # def _call_gemini_api(self, prompt, max_tokens=8192):
  26. # """Helper method to call Gemini API with retry logic"""
  27. # return self.model.generate_content(
  28. # prompt,
  29. # generation_config=genai.types.GenerationConfig(
  30. # temperature=0.2, # Lower for more consistent JSON
  31. # top_p=0.9,
  32. # top_k=40,
  33. # max_output_tokens=max_tokens, # Increased default
  34. # response_mime_type="application/json" # Force JSON output
  35. # ),
  36. # safety_settings={
  37. # genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
  38. # genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
  39. # genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
  40. # genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
  41. # }
  42. # )
  43. # def generate_attribute_suggestions(
  44. # self,
  45. # product: Dict,
  46. # issues: List[str],
  47. # category_rules: List[Dict]
  48. # ) -> Dict:
  49. # """
  50. # Use Gemini to generate intelligent suggestions for fixing attribute issues
  51. # Includes SEO-aware recommendations with robust error handling
  52. # """
  53. # try:
  54. # # Limit issues to prevent prompt overflow
  55. # limited_issues = issues[:15] if len(issues) > 15 else issues
  56. # prompt = self._build_prompt(product, limited_issues, category_rules)
  57. # response = self._call_gemini_api(prompt, max_tokens=8192)
  58. # # Check if response exists
  59. # if not response or not response.candidates:
  60. # logger.error(f"No candidates returned for SKU: {product.get('sku')}")
  61. # return {
  62. # 'error': 'No candidates returned by Gemini API',
  63. # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
  64. # }
  65. # candidate = response.candidates[0]
  66. # finish_reason_name = candidate.finish_reason.name
  67. # # Handle different finish reasons
  68. # if finish_reason_name == "MAX_TOKENS":
  69. # logger.warning(f"Max tokens reached for SKU: {product.get('sku')}, attempting partial parse")
  70. # # Try to parse partial response
  71. # try:
  72. # partial_result = self._parse_response(response.text)
  73. # if partial_result and 'error' not in partial_result:
  74. # return partial_result
  75. # except:
  76. # pass
  77. # # Retry with fewer issues
  78. # if len(issues) > 5:
  79. # logger.info("Retrying with fewer issues")
  80. # return self.generate_attribute_suggestions(product, issues[:5], category_rules)
  81. # else:
  82. # return {
  83. # 'error': 'Response too long, using fallback',
  84. # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
  85. # }
  86. # elif finish_reason_name in ("SAFETY", "RECITATION", "OTHER"):
  87. # logger.error(f"Response blocked by {finish_reason_name} for SKU: {product.get('sku')}")
  88. # return {
  89. # 'error': f'Response blocked by {finish_reason_name} filters',
  90. # 'safety_ratings': [
  91. # {'category': str(r.category), 'probability': str(r.probability)}
  92. # for r in candidate.safety_ratings
  93. # ],
  94. # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
  95. # }
  96. # elif finish_reason_name != "STOP":
  97. # logger.warning(f"Unexpected finish reason: {finish_reason_name}")
  98. # return {
  99. # 'error': f'Unexpected finish reason: {finish_reason_name}',
  100. # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
  101. # }
  102. # # Parse successful response
  103. # logger.info(f"Successfully received response for SKU: {product.get('sku')}")
  104. # suggestions = self._parse_response(response.text)
  105. # if 'error' in suggestions:
  106. # logger.warning(f"Parse error for SKU: {product.get('sku')}, using fallback")
  107. # suggestions['fallback_suggestions'] = self._generate_fallback_suggestions(limited_issues)
  108. # return suggestions
  109. # except Exception as e:
  110. # logger.error(f"Gemini API error for SKU {product.get('sku')}: {str(e)}", exc_info=True)
  111. # return {
  112. # 'error': str(e),
  113. # 'fallback_suggestions': self._generate_fallback_suggestions(issues[:10])
  114. # }
  115. # def _build_prompt(self, product: Dict, issues: List[str], rules: List[Dict]) -> str:
  116. # """Build a concise, structured prompt for Gemini with SEO awareness"""
  117. # mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
  118. # valid_values_map = {
  119. # r['attribute_name']: r.get('valid_values', [])[:5] # Limit to 5 values
  120. # for r in rules if r.get('valid_values')
  121. # }
  122. # # Sanitize and categorize issues
  123. # cleaned_issues = [
  124. # issue.replace("suspiciously short", "short value")
  125. # .replace("not recognized", "invalid")
  126. # .replace("likely means", "should be")
  127. # .replace("not clearly mentioned", "missing")
  128. # for issue in issues
  129. # ]
  130. # seo_issues = [i for i in cleaned_issues if i.startswith("SEO:")][:5]
  131. # attribute_issues = [i for i in cleaned_issues if not i.startswith("SEO:")][:8]
  132. # # Shortened prompt
  133. # prompt = f"""Analyze this e-commerce product and provide JSON suggestions.
  134. # PRODUCT:
  135. # SKU: {product.get('sku')}
  136. # Category: {product.get('category')}
  137. # Title: {product.get('title', '')[:200]}
  138. # Description: {product.get('description', '')[:300]}
  139. # Attributes: {json.dumps(product.get('attributes', {}), ensure_ascii=False)}
  140. # RULES:
  141. # Mandatory: {', '.join(mandatory_attrs)}
  142. # Valid Values: {json.dumps(valid_values_map, ensure_ascii=False)}
  143. # ISSUES ({len(attribute_issues)} attribute, {len(seo_issues)} SEO):
  144. # {chr(10).join(f"• {i}" for i in attribute_issues[:8])}
  145. # {chr(10).join(f"• {i}" for i in seo_issues[:5])}
  146. # Return ONLY this JSON structure (no markdown, no explanation):
  147. # {{
  148. # "corrected_attributes": {{"attr": "value"}},
  149. # "missing_attributes": {{"attr": "value"}},
  150. # "seo_optimizations": {{
  151. # "optimized_title": "50-100 char title",
  152. # "optimized_description": "50-150 word description",
  153. # "recommended_keywords": ["kw1", "kw2", "kw3"]
  154. # }},
  155. # "improvements": [
  156. # {{"issue": "...", "suggestion": "...", "confidence": "high/medium/low", "type": "attribute/seo"}}
  157. # ],
  158. # "quality_score_prediction": 85,
  159. # "reasoning": "Brief explanation"
  160. # }}
  161. # IMPORTANT: Keep response under 6000 tokens. Prioritize top 3 most critical improvements."""
  162. # return prompt
  163. # def _parse_response(self, response_text: str) -> Dict:
  164. # """Enhanced JSON parsing with multiple fallback strategies"""
  165. # if not response_text or not response_text.strip():
  166. # return {'error': 'Empty response from API'}
  167. # try:
  168. # # Strategy 1: Direct JSON parse (works with response_mime_type="application/json")
  169. # try:
  170. # parsed = json.loads(response_text)
  171. # logger.info("Successfully parsed JSON directly")
  172. # return parsed
  173. # except json.JSONDecodeError:
  174. # pass
  175. # # Strategy 2: Remove markdown code blocks
  176. # cleaned = response_text.strip()
  177. # if '```' in cleaned:
  178. # # Extract content between code blocks
  179. # match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', cleaned, re.DOTALL)
  180. # if match:
  181. # cleaned = match.group(1)
  182. # else:
  183. # # Remove all code block markers
  184. # cleaned = re.sub(r'```(?:json)?', '', cleaned).strip()
  185. # # Strategy 3: Find first { and last }
  186. # first_brace = cleaned.find('{')
  187. # last_brace = cleaned.rfind('}')
  188. # if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
  189. # cleaned = cleaned[first_brace:last_brace + 1]
  190. # # Strategy 4: Try parsing cleaned JSON
  191. # try:
  192. # parsed = json.loads(cleaned)
  193. # logger.info("Successfully parsed JSON after cleaning")
  194. # return parsed
  195. # except json.JSONDecodeError as e:
  196. # logger.warning(f"JSON parse error at position {e.pos}: {e.msg}")
  197. # # Strategy 5: Attempt to fix common JSON issues
  198. # cleaned = self._fix_json_syntax(cleaned)
  199. # try:
  200. # parsed = json.loads(cleaned)
  201. # logger.info("Successfully parsed JSON after syntax fixes")
  202. # return parsed
  203. # except json.JSONDecodeError:
  204. # pass
  205. # # Strategy 6: Extract partial valid JSON
  206. # partial_json = self._extract_partial_json(cleaned)
  207. # if partial_json:
  208. # logger.warning("Using partial JSON response")
  209. # return partial_json
  210. # # All strategies failed
  211. # logger.error(f"All JSON parsing strategies failed. Response length: {len(response_text)}")
  212. # logger.error(f"Response preview: {response_text[:500]}...")
  213. # return {
  214. # 'error': 'Failed to parse AI response',
  215. # 'raw_response': response_text[:1000], # Limit size
  216. # 'parse_attempts': 6
  217. # }
  218. # except Exception as e:
  219. # logger.error(f"Unexpected error in _parse_response: {e}", exc_info=True)
  220. # return {
  221. # 'error': f'Parse exception: {str(e)}',
  222. # 'raw_response': response_text[:500] if response_text else 'None'
  223. # }
  224. # def _fix_json_syntax(self, json_str: str) -> str:
  225. # """Attempt to fix common JSON syntax issues"""
  226. # try:
  227. # # Remove trailing commas before closing braces/brackets
  228. # json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
  229. # # Fix unescaped quotes in strings (simple heuristic)
  230. # # This is risky but can help in some cases
  231. # json_str = re.sub(r'(?<!\\)"(?=[^,:}\]]*[,:}\]])', '\\"', json_str)
  232. # # Remove any trailing content after final }
  233. # last_brace = json_str.rfind('}')
  234. # if last_brace != -1:
  235. # json_str = json_str[:last_brace + 1]
  236. # return json_str
  237. # except:
  238. # return json_str
  239. # def _extract_partial_json(self, json_str: str) -> Dict:
  240. # """Extract valid partial JSON by finding complete objects"""
  241. # try:
  242. # # Try to find complete nested structures
  243. # depth = 0
  244. # start_idx = json_str.find('{')
  245. # if start_idx == -1:
  246. # return None
  247. # for i in range(start_idx, len(json_str)):
  248. # if json_str[i] == '{':
  249. # depth += 1
  250. # elif json_str[i] == '}':
  251. # depth -= 1
  252. # if depth == 0:
  253. # # Found complete JSON object
  254. # try:
  255. # return json.loads(json_str[start_idx:i+1])
  256. # except:
  257. # continue
  258. # return None
  259. # except:
  260. # return None
  261. # def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
  262. # """Generate enhanced fallback suggestions based on issues"""
  263. # suggestions = []
  264. # # Group similar issues
  265. # issue_categories = {
  266. # 'missing': [],
  267. # 'invalid': [],
  268. # 'seo': [],
  269. # 'other': []
  270. # }
  271. # for issue in issues:
  272. # if 'missing' in issue.lower() or 'mandatory' in issue.lower():
  273. # issue_categories['missing'].append(issue)
  274. # elif 'invalid' in issue.lower() or 'not in valid' in issue.lower():
  275. # issue_categories['invalid'].append(issue)
  276. # elif issue.startswith('SEO:'):
  277. # issue_categories['seo'].append(issue)
  278. # else:
  279. # issue_categories['other'].append(issue)
  280. # # Generate consolidated suggestions
  281. # for category, category_issues in issue_categories.items():
  282. # if not category_issues:
  283. # continue
  284. # for issue in category_issues[:5]: # Limit to 5 per category
  285. # suggestion = "Review and correct this issue"
  286. # confidence = "medium"
  287. # issue_type = "seo" if category == 'seo' else "attribute"
  288. # # Specific suggestions
  289. # if "Missing mandatory field" in issue:
  290. # attr = issue.split(":")[-1].strip()
  291. # suggestion = f"Add {attr} - check product details or title/description"
  292. # confidence = "high"
  293. # elif "not in valid values" in issue or "invalid" in issue.lower():
  294. # suggestion = "Use one of the valid values from category rules"
  295. # confidence = "high"
  296. # elif "placeholder" in issue.lower():
  297. # suggestion = "Replace with actual product data"
  298. # confidence = "high"
  299. # elif "too short" in issue.lower():
  300. # if "title" in issue.lower():
  301. # suggestion = "Expand to 50-100 characters with key attributes"
  302. # confidence = "high"
  303. # issue_type = "seo"
  304. # elif "description" in issue.lower():
  305. # suggestion = "Expand to 50-150 words with details"
  306. # confidence = "high"
  307. # issue_type = "seo"
  308. # else:
  309. # suggestion = "Provide more detailed information"
  310. # confidence = "medium"
  311. # elif "keyword" in issue.lower() or "search term" in issue.lower():
  312. # suggestion = "Add relevant keywords to improve discoverability"
  313. # confidence = "medium"
  314. # issue_type = "seo"
  315. # suggestions.append({
  316. # 'issue': issue,
  317. # 'suggestion': suggestion,
  318. # 'confidence': confidence,
  319. # 'type': issue_type,
  320. # 'category': category
  321. # })
  322. # return suggestions[:15] # Return top 15 suggestions
  323. # def extract_attributes_with_ai(self, title: str, description: str, category: str) -> Dict:
  324. # """
  325. # Use Gemini to extract attributes from unstructured text
  326. # """
  327. # try:
  328. # prompt = f"""Extract product attributes from this text. Return ONLY valid JSON.
  329. # Category: {category}
  330. # Title: {title[:200]}
  331. # Description: {description[:400]}
  332. # Return format:
  333. # {{
  334. # "brand": "value or null",
  335. # "color": "value or null",
  336. # "size": "value or null",
  337. # "material": "value or null",
  338. # "model": "value or null"
  339. # }}"""
  340. # response = self._call_gemini_api(prompt, max_tokens=1024)
  341. # if not response or not response.candidates:
  342. # return {'error': 'No response'}
  343. # return self._parse_response(response.text)
  344. # except Exception as e:
  345. # logger.error(f"AI extraction error: {str(e)}")
  346. # return {'error': str(e)}
  347. # gemini_service_enhanced.py
  348. """
  349. Enhanced Gemini service with comprehensive suggestions for all components
  350. """
  351. import google.generativeai as genai
  352. import json
  353. import logging
  354. import re
  355. from typing import Dict, List
  356. from django.conf import settings
  357. from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
  358. logger = logging.getLogger(__name__)
  359. class GeminiAttributeService:
  360. """Enhanced service with comprehensive AI suggestions"""
  361. def __init__(self):
  362. api_key = getattr(settings, 'GEMINI_API_KEY', None)
  363. if not api_key:
  364. raise ValueError("GEMINI_API_KEY not found in settings")
  365. genai.configure(api_key=api_key)
  366. self.model = genai.GenerativeModel('gemini-2.5-flash')
  367. @retry(
  368. stop=stop_after_attempt(3),
  369. wait=wait_exponential(multiplier=1, min=2, max=10),
  370. retry=retry_if_exception_type(Exception)
  371. )
  372. def _call_gemini_api(self, prompt, max_tokens=8192):
  373. """Helper method to call Gemini API with retry logic"""
  374. try:
  375. return self.model.generate_content(
  376. prompt,
  377. generation_config=genai.types.GenerationConfig(
  378. temperature=0.2,
  379. top_p=0.9,
  380. top_k=40,
  381. max_output_tokens=max_tokens,
  382. response_mime_type="application/json"
  383. ),
  384. safety_settings={
  385. genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
  386. genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
  387. genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
  388. genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
  389. }
  390. )
  391. except genai.types.GenerationError as e:
  392. # Handle specific generation errors
  393. print("Generation error:", str(e))
  394. return {"error": "Content generation failed", "details": str(e)}
  395. except Exception as e:
  396. # Catch-all for any other unexpected errors
  397. print("Unexpected error:", str(e))
  398. return {"error": "Unexpected error occurred", "details": str(e)}
  399. def generate_comprehensive_suggestions(
  400. self,
  401. product: Dict,
  402. issues: List[str],
  403. category_rules: List[Dict],
  404. scores: Dict
  405. ) -> Dict:
  406. """
  407. Generate comprehensive AI suggestions covering ALL quality aspects
  408. """
  409. try:
  410. limited_issues = issues[:20] if len(issues) > 20 else issues
  411. prompt = self._build_comprehensive_prompt(product, limited_issues, category_rules, scores)
  412. response = self._call_gemini_api(prompt, max_tokens=8192)
  413. print("response",response)
  414. if not response or not response.candidates:
  415. logger.error(f"No candidates returned for SKU: {product.get('sku')}")
  416. return {
  417. 'error': 'No response from AI',
  418. 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
  419. }
  420. candidate = response.candidates[0]
  421. finish_reason = candidate.finish_reason.name
  422. if finish_reason != "STOP":
  423. logger.warning(f"Non-STOP finish reason: {finish_reason}")
  424. if finish_reason == "MAX_TOKENS" and len(issues) > 10:
  425. return self.generate_comprehensive_suggestions(product, issues[:10], category_rules, scores)
  426. return {
  427. 'error': f'Response blocked: {finish_reason}',
  428. 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
  429. }
  430. logger.info(f"Successfully received comprehensive suggestions for SKU: {product.get('sku')}")
  431. suggestions = self._parse_response(response.text)
  432. if 'error' in suggestions:
  433. suggestions['fallback_suggestions'] = self._generate_fallback_suggestions(limited_issues)
  434. return suggestions
  435. except Exception as e:
  436. logger.error(f"Gemini API error: {str(e)}", exc_info=True)
  437. return {
  438. 'error': str(e),
  439. 'fallback_suggestions': self._generate_fallback_suggestions(issues[:15])
  440. }
  441. def _build_comprehensive_prompt(
  442. self,
  443. product: Dict,
  444. issues: List[str],
  445. rules: List[Dict],
  446. scores: Dict
  447. ) -> str:
  448. """Build comprehensive prompt for all quality aspects"""
  449. mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
  450. valid_values_map = {
  451. r['attribute_name']: r.get('valid_values', [])[:5]
  452. for r in rules if r.get('valid_values')
  453. }
  454. # Categorize issues
  455. attribute_issues = [i for i in issues if not any(prefix in i for prefix in ['Title:', 'Description:', 'SEO:'])]
  456. title_issues = [i for i in issues if i.startswith('Title:')]
  457. desc_issues = [i for i in issues if i.startswith('Description:')]
  458. seo_issues = [i for i in issues if i.startswith('SEO:')]
  459. prompt = f"""Analyze this e-commerce product and provide comprehensive quality improvements.
  460. PRODUCT DATA:
  461. SKU: {product.get('sku')}
  462. Category: {product.get('category')}
  463. Title: {product.get('title', '')[:250]}
  464. Description: {product.get('description', '')[:400]}
  465. Attributes: {json.dumps(product.get('attributes', {}), ensure_ascii=False)}
  466. QUALITY SCORES (out of 100):
  467. - Mandatory Fields: {scores.get('mandatory_fields', 0):.1f}
  468. - Standardization: {scores.get('standardization', 0):.1f}
  469. - Missing Values: {scores.get('missing_values', 0):.1f}
  470. - Consistency: {scores.get('consistency', 0):.1f}
  471. - SEO: {scores.get('seo_discoverability', 0):.1f}
  472. - Title Quality: {scores.get('title_quality', 0):.1f}
  473. - Description Quality: {scores.get('description_quality', 0):.1f}
  474. CATEGORY RULES:
  475. Mandatory Attributes: {', '.join(mandatory_attrs)}
  476. Valid Values: {json.dumps(valid_values_map, ensure_ascii=False)}
  477. ISSUES FOUND:
  478. Attributes ({len(attribute_issues)}):
  479. {chr(10).join(f" • {i}" for i in attribute_issues[:8])}
  480. Title ({len(title_issues)}):
  481. {chr(10).join(f" • {i}" for i in title_issues[:5])}
  482. Description ({len(desc_issues)}):
  483. {chr(10).join(f" • {i}" for i in desc_issues[:5])}
  484. SEO ({len(seo_issues)}):
  485. {chr(10).join(f" • {i}" for i in seo_issues[:5])}
  486. Return ONLY this JSON structure:
  487. {{
  488. "corrected_attributes": {{
  489. "attr_name": "corrected_value"
  490. }},
  491. "missing_attributes": {{
  492. "attr_name": "suggested_value"
  493. }},
  494. "improved_title": "optimized title (50-100 chars, includes brand, model, key features)",
  495. "improved_description": "enhanced description (50-150 words, features, benefits, specs, use cases)",
  496. "seo_keywords": ["keyword1", "keyword2", "keyword3"],
  497. "improvements": [
  498. {{
  499. "component": "attributes/title/description/seo",
  500. "issue": "specific issue",
  501. "suggestion": "how to fix",
  502. "priority": "high/medium/low",
  503. "confidence": "high/medium/low"
  504. }}
  505. ],
  506. "quality_score_prediction": 93.25,
  507. "summary": "Brief 2-3 sentence summary of key improvements needed"
  508. }}
  509. CRITICAL: Keep response under 7000 tokens. Focus on top 5 most impactful improvements."""
  510. return prompt
  511. def _parse_response(self, response_text: str) -> Dict:
  512. """Enhanced JSON parsing with fallback strategies"""
  513. if not response_text or not response_text.strip():
  514. return {'error': 'Empty response from API'}
  515. try:
  516. # Direct JSON parse
  517. try:
  518. parsed = json.loads(response_text)
  519. logger.info("Successfully parsed JSON directly")
  520. return parsed
  521. except json.JSONDecodeError:
  522. pass
  523. # Remove markdown code blocks
  524. cleaned = response_text.strip()
  525. if '```' in cleaned:
  526. match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', cleaned, re.DOTALL)
  527. if match:
  528. cleaned = match.group(1)
  529. else:
  530. cleaned = re.sub(r'```(?:json)?', '', cleaned).strip()
  531. # Find first { and last }
  532. first_brace = cleaned.find('{')
  533. last_brace = cleaned.rfind('}')
  534. if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
  535. cleaned = cleaned[first_brace:last_brace + 1]
  536. # Try parsing cleaned JSON
  537. try:
  538. parsed = json.loads(cleaned)
  539. logger.info("Successfully parsed JSON after cleaning")
  540. return parsed
  541. except json.JSONDecodeError as e:
  542. logger.warning(f"JSON parse error: {e}")
  543. # Fix common JSON issues
  544. cleaned = self._fix_json_syntax(cleaned)
  545. try:
  546. parsed = json.loads(cleaned)
  547. logger.info("Successfully parsed JSON after syntax fixes")
  548. return parsed
  549. except json.JSONDecodeError:
  550. pass
  551. # Extract partial valid JSON
  552. partial_json = self._extract_partial_json(cleaned)
  553. if partial_json:
  554. logger.warning("Using partial JSON response")
  555. return partial_json
  556. logger.error(f"All JSON parsing failed. Response length: {len(response_text)}")
  557. return {
  558. 'error': 'Failed to parse AI response',
  559. 'raw_response': response_text[:500]
  560. }
  561. except Exception as e:
  562. logger.error(f"Parse exception: {e}", exc_info=True)
  563. return {
  564. 'error': f'Parse exception: {str(e)}',
  565. 'raw_response': response_text[:500] if response_text else 'None'
  566. }
  567. def _fix_json_syntax(self, json_str: str) -> str:
  568. """Fix common JSON syntax issues"""
  569. try:
  570. # Remove trailing commas
  571. json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
  572. # Remove trailing content after final }
  573. last_brace = json_str.rfind('}')
  574. if last_brace != -1:
  575. json_str = json_str[:last_brace + 1]
  576. return json_str
  577. except:
  578. return json_str
  579. def _extract_partial_json(self, json_str: str) -> Dict:
  580. """Extract valid partial JSON"""
  581. try:
  582. depth = 0
  583. start_idx = json_str.find('{')
  584. if start_idx == -1:
  585. return None
  586. for i in range(start_idx, len(json_str)):
  587. if json_str[i] == '{':
  588. depth += 1
  589. elif json_str[i] == '}':
  590. depth -= 1
  591. if depth == 0:
  592. try:
  593. return json.loads(json_str[start_idx:i+1])
  594. except:
  595. continue
  596. return None
  597. except:
  598. return None
  599. def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
  600. """Generate fallback suggestions based on issues"""
  601. suggestions = []
  602. for issue in issues[:15]:
  603. suggestion_text = "Review and correct this issue"
  604. confidence = "medium"
  605. component = "attribute"
  606. priority = "medium"
  607. issue_lower = issue.lower()
  608. # Determine component
  609. if issue.startswith('Title:'):
  610. component = "title"
  611. elif issue.startswith('Description:'):
  612. component = "description"
  613. elif issue.startswith('SEO:'):
  614. component = "seo"
  615. # Specific suggestions
  616. if "missing mandatory" in issue_lower:
  617. attr = issue.split(":")[-1].strip()
  618. suggestion_text = f"Add required {attr} - check product packaging or manufacturer details"
  619. priority = "high"
  620. confidence = "high"
  621. elif "too short" in issue_lower:
  622. if "title" in issue_lower:
  623. suggestion_text = "Expand title to 50-100 characters including brand, model, and key features"
  624. component = "title"
  625. priority = "high"
  626. elif "description" in issue_lower:
  627. suggestion_text = "Write comprehensive 50-150 word description with features, benefits, and specifications"
  628. component = "description"
  629. priority = "high"
  630. else:
  631. suggestion_text = "Provide more detailed information"
  632. elif "placeholder" in issue_lower:
  633. suggestion_text = "Replace with actual product data from manufacturer or packaging"
  634. priority = "high"
  635. elif "grammar" in issue_lower or "spelling" in issue_lower:
  636. suggestion_text = "Run spell-check and grammar review, ensure professional language"
  637. component = "description"
  638. priority = "medium"
  639. elif "keyword" in issue_lower or "seo" in issue_lower:
  640. suggestion_text = "Add relevant search keywords and product attributes"
  641. component = "seo"
  642. priority = "medium"
  643. elif "duplicate" in issue_lower or "repetit" in issue_lower:
  644. suggestion_text = "Remove duplicate content, provide varied information with unique details"
  645. component = "description"
  646. priority = "medium"
  647. elif "not recognized" in issue_lower or "invalid" in issue_lower:
  648. suggestion_text = "Use standardized values from category rules"
  649. priority = "high"
  650. confidence = "high"
  651. suggestions.append({
  652. 'component': component,
  653. 'issue': issue,
  654. 'suggestion': suggestion_text,
  655. 'priority': priority,
  656. 'confidence': confidence
  657. })
  658. return suggestions