gemini_service.py 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084
  1. # import google.generativeai as genai
  2. # import json
  3. # import logging
  4. # from typing import Dict, List
  5. # from django.conf import settings
  6. # from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
  7. # logger = logging.getLogger(__name__)
  8. # class GeminiAttributeService:
  9. # """Service to interact with Google Gemini API for attribute suggestions"""
  10. # def __init__(self):
  11. # # Configure Gemini API
  12. # api_key = getattr(settings, 'GEMINI_API_KEY', None)
  13. # if not api_key:
  14. # raise ValueError("GEMINI_API_KEY not found in settings")
  15. # genai.configure(api_key=api_key)
  16. # self.model = genai.GenerativeModel('gemini-2.5-flash')
  17. # @retry(
  18. # stop=stop_after_attempt(3),
  19. # wait=wait_exponential(multiplier=1, min=2, max=10),
  20. # retry=retry_if_exception_type(Exception),
  21. # before_sleep=lambda retry_state: logger.info(f"Retrying Gemini API call, attempt {retry_state.attempt_number}")
  22. # )
  23. # def _call_gemini_api(self, prompt):
  24. # """Helper method to call Gemini API with retry logic"""
  25. # return self.model.generate_content(
  26. # prompt,
  27. # generation_config=genai.types.GenerationConfig(
  28. # temperature=0.3,
  29. # top_p=0.95,
  30. # top_k=40,
  31. # max_output_tokens=4096 # Increased to handle complex responses
  32. # ),
  33. # safety_settings={
  34. # genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
  35. # genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
  36. # genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
  37. # genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
  38. # }
  39. # )
  40. # def generate_attribute_suggestions(
  41. # self,
  42. # product: Dict,
  43. # issues: List[str],
  44. # category_rules: List[Dict]
  45. # ) -> Dict:
  46. # """
  47. # Use Gemini to generate intelligent suggestions for fixing attribute issues
  48. # ...
  49. # """
  50. # try:
  51. # prompt = self._build_prompt(product, issues, category_rules)
  52. # response = self._call_gemini_api(prompt)
  53. # print(f"response is {response}")
  54. # if not response.candidates:
  55. # logger.error(f"No candidates returned. Response: {response}")
  56. # return {
  57. # 'error': 'No candidates returned by Gemini API',
  58. # 'fallback_suggestions': self._generate_fallback_suggestions(issues)
  59. # }
  60. # candidate = response.candidates[0]
  61. # # --- START FIX: Use candidate.finish_reason.name for robust check ---
  62. # # Check if the finish reason indicates a block (e.g., SAFETY, OTHER, RECITATION)
  63. # finish_reason_name = candidate.finish_reason.name
  64. # if finish_reason_name in ("SAFETY", "RECITATION", "OTHER"): # Add other block reasons as needed
  65. # logger.error(f"Response blocked by {finish_reason_name}. Safety ratings: {candidate.safety_ratings}")
  66. # return {
  67. # 'error': f'Response blocked by {finish_reason_name} filters',
  68. # 'safety_ratings': [
  69. # {'category': str(r.category), 'probability': str(r.probability)}
  70. # for r in candidate.safety_ratings
  71. # ],
  72. # 'fallback_suggestions': self._generate_fallback_suggestions(issues)
  73. # }
  74. # # --- END FIX ---
  75. # logger.info(f"Raw response: {response.text[:500]}...")
  76. # suggestions = self._parse_response(response.text)
  77. # logger.info(f"Parsed suggestions: {suggestions}")
  78. # return suggestions
  79. # except Exception as e:
  80. # logger.error(f"Gemini API error: {str(e)}", exc_info=True)
  81. # return {
  82. # 'error': str(e),
  83. # 'fallback_suggestions': self._generate_fallback_suggestions(issues)
  84. # }
  85. # def _build_prompt(self, product: Dict, issues: List[str], rules: List[Dict]) -> str:
  86. # """Build a structured prompt for Gemini"""
  87. # mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
  88. # valid_values_map = {
  89. # r['attribute_name']: r.get('valid_values', [])
  90. # for r in rules if r.get('valid_values')
  91. # }
  92. # # Sanitize issues to avoid ambiguous phrasing
  93. # cleaned_issues = [
  94. # issue.replace("suspiciously short", "may need more detail")
  95. # .replace("not recognized", "not in valid values")
  96. # .replace("likely means", "recommended correction")
  97. # .replace("not clearly mentioned", "missing from title/description")
  98. # for issue in issues
  99. # ]
  100. # prompt = f"""You are an expert e-commerce product data analyst specializing in clothing products. All input data is safe, non-sensitive, and related to clothing product attributes. Your task is to analyze product attributes and provide specific, actionable suggestions to fix identified issues, ensuring compliance with category rules.
  101. # PRODUCT INFORMATION:
  102. # - SKU: {product.get('sku', 'N/A')}
  103. # - Category: {product.get('category', 'N/A')}
  104. # - Title: {product.get('title', 'N/A')}
  105. # - Description: {product.get('description', 'N/A')}
  106. # - Current Attributes: {json.dumps(product.get('attributes', {}), indent=2)}
  107. # CATEGORY RULES:
  108. # - Mandatory Attributes: {', '.join(mandatory_attrs) or 'None'}
  109. # - Valid Values: {json.dumps(valid_values_map, indent=2) or '{}'}
  110. # DETECTED ISSUES:
  111. # {chr(10).join(f"- {issue}" for issue in cleaned_issues) or '- None'}
  112. # TASK:
  113. # Analyze the product data and issues. Provide specific suggestions to fix each issue and extract missing attributes from the title or description. Ensure all suggestions are relevant to clothing products and formatted as valid JSON.
  114. # OUTPUT FORMAT (return valid JSON only):
  115. # {{
  116. # "corrected_attributes": {{
  117. # "attribute_name": "suggested_value"
  118. # }},
  119. # "missing_attributes": {{
  120. # "attribute_name": "extracted_value"
  121. # }},
  122. # "improvements": [
  123. # {{
  124. # "issue": "description of the issue",
  125. # "suggestion": "specific action to take",
  126. # "confidence": "high/medium/low"
  127. # }}
  128. # ],
  129. # "quality_score_prediction": integer,
  130. # "reasoning": "Brief explanation of suggested changes"
  131. # }}"""
  132. # logger.info(f"Generated prompt (length: {len(prompt)} chars, ~{len(prompt)//4} tokens): {prompt}")
  133. # return prompt
  134. # def _parse_response(self, response_text: str) -> Dict:
  135. # """Parse Gemini's response and extract JSON"""
  136. # try:
  137. # # Remove markdown code blocks and language identifier
  138. # cleaned = response_text.strip()
  139. # if cleaned.startswith('```'):
  140. # cleaned = cleaned.split('```')[1].strip()
  141. # if cleaned.startswith('json'):
  142. # cleaned = cleaned[4:].strip()
  143. # # Attempt to parse JSON
  144. # parsed = json.loads(cleaned)
  145. # return parsed
  146. # except json.JSONDecodeError as e:
  147. # logger.error(f"Failed to parse Gemini response: {e}")
  148. # logger.error(f"Response was: {response_text[:1000]}...")
  149. # # Attempt to fix partial JSON
  150. # try:
  151. # # Truncate at last valid closing brace
  152. # last_valid = cleaned.rfind('}')
  153. # if last_valid != -1:
  154. # partial_json = cleaned[:last_valid + 1]
  155. # parsed = json.loads(partial_json)
  156. # logger.warning("Parsed partial JSON response")
  157. # return parsed
  158. # except json.JSONDecodeError:
  159. # logger.error("Could not parse partial JSON")
  160. # return {
  161. # 'error': 'Failed to parse AI response',
  162. # 'raw_response': response_text,
  163. # 'fallback_suggestions': []
  164. # }
  165. # def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
  166. # """Generate enhanced fallback suggestions based on issues"""
  167. # suggestions = []
  168. # for issue in issues:
  169. # suggestion = "Please review and correct this issue manually"
  170. # confidence = "low"
  171. # # Specific suggestions for common issues
  172. # if "Missing mandatory field" in issue:
  173. # attr = issue.split("Missing mandatory field: ")[-1]
  174. # suggestion = f"Provide a valid value for {attr} (e.g., extract from title/description or use a common value like 'Black' for color)"
  175. # confidence = "medium"
  176. # elif "not in valid values" in issue:
  177. # attr = issue.split(":")[0].strip()
  178. # suggestion = f"Choose a valid value for {attr} (e.g., XS, S, M, L, XL for size)"
  179. # confidence = "medium"
  180. # elif "contains placeholder" in issue:
  181. # attr = issue.split("'")[1]
  182. # suggestion = f"Replace the placeholder in {attr} with a specific value (e.g., M, L, XL for size)"
  183. # confidence = "high"
  184. # elif "recommended correction" in issue:
  185. # correction = issue.split("recommended correction ")[-1].split(" ")[0].strip(")'")
  186. # attr = issue.split(":")[0].strip()
  187. # suggestion = f"Correct {attr} to '{correction}'"
  188. # confidence = "high"
  189. # elif "may need more detail" in issue:
  190. # attr = issue.split("'")[1]
  191. # suggestion = f"Provide a more detailed value for {attr} (e.g., 'Medium' instead of 'M')"
  192. # confidence = "medium"
  193. # elif "missing from title/description" in issue:
  194. # attr = issue.split("'")[1]
  195. # value = issue.split("'")[3] if len(issue.split("'")) > 3 else "unknown"
  196. # suggestion = f"Add '{value}' to the title or description for {attr} (e.g., update title to include '{value}')"
  197. # confidence = "high"
  198. # suggestions.append({
  199. # 'issue': issue,
  200. # 'suggestion': suggestion,
  201. # 'confidence': confidence
  202. # })
  203. # return suggestions
  204. # def extract_attributes_with_ai(self, title: str, description: str, category: str) -> Dict:
  205. # """
  206. # Use Gemini to extract attributes from unstructured text
  207. # """
  208. # try:
  209. # prompt = f"""You are an expert e-commerce product data analyst specializing in clothing products. All input data is safe, non-sensitive, and related to clothing product attributes. Extract product attributes from the following text.
  210. # Category: {category}
  211. # Title: {title}
  212. # Description: {description}
  213. # Extract these attributes if present:
  214. # - brand
  215. # - color
  216. # - size
  217. # - material
  218. # - model
  219. # - weight
  220. # - dimensions
  221. # - warranty
  222. # Return ONLY valid JSON in this format:
  223. # {{
  224. # "brand": "extracted brand or null",
  225. # "color": "extracted color or null",
  226. # "size": "extracted size or null",
  227. # "material": "extracted material or null",
  228. # "model": "extracted model or null",
  229. # "weight": "extracted weight or null",
  230. # "dimensions": "extracted dimensions or null",
  231. # "warranty": "extracted warranty or null"
  232. # }}"""
  233. # response = self._call_gemini_api(prompt)
  234. # logger.info(f"Raw extraction response: {response.text[:500]}...")
  235. # return self._parse_response(response.text)
  236. # except Exception as e:
  237. # logger.error(f"AI extraction error: {str(e)}")
  238. # return {
  239. # 'error': str(e),
  240. # 'fallback': {}
  241. # }
  242. # import google.generativeai as genai
  243. # import json
  244. # import logging
  245. # from typing import Dict, List
  246. # from django.conf import settings
  247. # from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
  248. # logger = logging.getLogger(__name__)
  249. # class GeminiAttributeService:
  250. # """Service to interact with Google Gemini API for attribute and SEO suggestions"""
  251. # def __init__(self):
  252. # # Configure Gemini API
  253. # api_key = getattr(settings, 'GEMINI_API_KEY', None)
  254. # if not api_key:
  255. # raise ValueError("GEMINI_API_KEY not found in settings")
  256. # genai.configure(api_key=api_key)
  257. # self.model = genai.GenerativeModel('gemini-2.5-flash')
  258. # @retry(
  259. # stop=stop_after_attempt(3),
  260. # wait=wait_exponential(multiplier=1, min=2, max=10),
  261. # retry=retry_if_exception_type(Exception),
  262. # before_sleep=lambda retry_state: logger.info(f"Retrying Gemini API call, attempt {retry_state.attempt_number}")
  263. # )
  264. # def _call_gemini_api(self, prompt):
  265. # """Helper method to call Gemini API with retry logic"""
  266. # return self.model.generate_content(
  267. # prompt,
  268. # generation_config=genai.types.GenerationConfig(
  269. # temperature=0.3,
  270. # top_p=0.95,
  271. # top_k=40,
  272. # max_output_tokens=4096
  273. # ),
  274. # safety_settings={
  275. # genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
  276. # genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
  277. # genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
  278. # genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
  279. # }
  280. # )
  281. # def generate_attribute_suggestions(
  282. # self,
  283. # product: Dict,
  284. # issues: List[str],
  285. # category_rules: List[Dict]
  286. # ) -> Dict:
  287. # """
  288. # Use Gemini to generate intelligent suggestions for fixing attribute issues
  289. # Includes SEO-aware recommendations
  290. # """
  291. # try:
  292. # prompt = self._build_prompt(product, issues, category_rules)
  293. # response = self._call_gemini_api(prompt)
  294. # if not response.candidates:
  295. # logger.error(f"No candidates returned. Response: {response}")
  296. # return {
  297. # 'error': 'No candidates returned by Gemini API',
  298. # 'fallback_suggestions': self._generate_fallback_suggestions(issues)
  299. # }
  300. # candidate = response.candidates[0]
  301. # finish_reason_name = candidate.finish_reason.name
  302. # if finish_reason_name in ("SAFETY", "RECITATION", "OTHER"):
  303. # logger.error(f"Response blocked by {finish_reason_name}. Safety ratings: {candidate.safety_ratings}")
  304. # return {
  305. # 'error': f'Response blocked by {finish_reason_name} filters',
  306. # 'safety_ratings': [
  307. # {'category': str(r.category), 'probability': str(r.probability)}
  308. # for r in candidate.safety_ratings
  309. # ],
  310. # 'fallback_suggestions': self._generate_fallback_suggestions(issues)
  311. # }
  312. # logger.info(f"Raw response: {response.text[:500]}...")
  313. # suggestions = self._parse_response(response.text)
  314. # logger.info(f"Parsed suggestions: {suggestions}")
  315. # return suggestions
  316. # except Exception as e:
  317. # logger.error(f"Gemini API error: {str(e)}", exc_info=True)
  318. # return {
  319. # 'error': str(e),
  320. # 'fallback_suggestions': self._generate_fallback_suggestions(issues)
  321. # }
  322. # def _build_prompt(self, product: Dict, issues: List[str], rules: List[Dict]) -> str:
  323. # """Build a structured prompt for Gemini with SEO awareness"""
  324. # mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
  325. # valid_values_map = {
  326. # r['attribute_name']: r.get('valid_values', [])
  327. # for r in rules if r.get('valid_values')
  328. # }
  329. # # Sanitize issues
  330. # cleaned_issues = [
  331. # issue.replace("suspiciously short", "may need more detail")
  332. # .replace("not recognized", "not in valid values")
  333. # .replace("likely means", "recommended correction")
  334. # .replace("not clearly mentioned", "missing from title/description")
  335. # for issue in issues
  336. # ]
  337. # # Separate SEO issues
  338. # seo_issues = [i for i in cleaned_issues if i.startswith("SEO:")]
  339. # attribute_issues = [i for i in cleaned_issues if not i.startswith("SEO:")]
  340. # prompt = f"""You are an expert e-commerce product data analyst specializing in clothing products and SEO optimization. All input data is safe, non-sensitive, and related to clothing product attributes. Your task is to analyze product attributes and provide specific, actionable suggestions to fix identified issues, ensuring compliance with category rules and SEO best practices.
  341. # PRODUCT INFORMATION:
  342. # - SKU: {product.get('sku', 'N/A')}
  343. # - Category: {product.get('category', 'N/A')}
  344. # - Title: {product.get('title', 'N/A')}
  345. # - Description: {product.get('description', 'N/A')}
  346. # - Current Attributes: {json.dumps(product.get('attributes', {}), indent=2)}
  347. # CATEGORY RULES:
  348. # - Mandatory Attributes: {', '.join(mandatory_attrs) or 'None'}
  349. # - Valid Values: {json.dumps(valid_values_map, indent=2) or '{}'}
  350. # DETECTED ATTRIBUTE ISSUES:
  351. # {chr(10).join(f"- {issue}" for issue in attribute_issues) or '- None'}
  352. # DETECTED SEO ISSUES:
  353. # {chr(10).join(f"- {issue}" for issue in seo_issues) or '- None'}
  354. # TASK:
  355. # 1. Analyze the product data and fix all attribute issues
  356. # 2. Provide SEO-optimized recommendations for title and description
  357. # 3. Suggest missing attributes that can be extracted from title/description
  358. # 4. Ensure all suggestions improve both data quality AND discoverability
  359. # SEO GUIDELINES:
  360. # - Title should be 50-100 characters with key attributes (brand, model, color, size)
  361. # - Description should be 50-150 words, descriptive, and include relevant keywords
  362. # - Include high-value search terms where appropriate (e.g., "premium", "durable", "authentic")
  363. # - Avoid keyword stuffing - keep text natural and readable
  364. # OUTPUT FORMAT (return valid JSON only):
  365. # {{
  366. # "corrected_attributes": {{
  367. # "attribute_name": "suggested_value"
  368. # }},
  369. # "missing_attributes": {{
  370. # "attribute_name": "extracted_value"
  371. # }},
  372. # "seo_optimizations": {{
  373. # "optimized_title": "SEO-friendly title suggestion (if title needs improvement)",
  374. # "optimized_description": "SEO-friendly description suggestion (if description needs improvement)",
  375. # "recommended_keywords": ["keyword1", "keyword2"],
  376. # "title_improvements": "Specific changes for title",
  377. # "description_improvements": "Specific changes for description"
  378. # }},
  379. # "improvements": [
  380. # {{
  381. # "issue": "description of the issue",
  382. # "suggestion": "specific action to take",
  383. # "confidence": "high/medium/low",
  384. # "type": "attribute/seo"
  385. # }}
  386. # ],
  387. # "quality_score_prediction": integer (0-100),
  388. # "reasoning": "Brief explanation of suggested changes and expected impact on discoverability"
  389. # }}"""
  390. # logger.info(f"Generated prompt (length: {len(prompt)} chars)")
  391. # return prompt
  392. # def generate_seo_suggestions(self, product: Dict, seo_issues: List[str]) -> Dict:
  393. # """
  394. # Generate SEO-specific suggestions using Gemini
  395. # Focused prompt for SEO optimization only
  396. # """
  397. # try:
  398. # prompt = f"""You are an SEO expert for e-commerce products. Analyze this product and provide SEO optimization suggestions.
  399. # PRODUCT:
  400. # - Title: {product.get('title', '')}
  401. # - Description: {product.get('description', '')}
  402. # - Category: {product.get('category', '')}
  403. # - Attributes: {json.dumps(product.get('attributes', {}), indent=2)}
  404. # SEO ISSUES DETECTED:
  405. # {chr(10).join(f"- {issue}" for issue in seo_issues)}
  406. # TASK:
  407. # Provide specific, actionable SEO improvements focusing on:
  408. # 1. Keyword optimization (include relevant search terms)
  409. # 2. Title structure (50-100 chars, include key attributes)
  410. # 3. Description quality (50-150 words, descriptive, engaging)
  411. # 4. Searchability (ensure users can find this product)
  412. # Return ONLY valid JSON:
  413. # {{
  414. # "optimized_title": "Improved title with better SEO",
  415. # "optimized_description": "Improved description with better SEO",
  416. # "recommended_keywords": ["keyword1", "keyword2", "keyword3"],
  417. # "changes_made": [
  418. # "Specific change 1",
  419. # "Specific change 2"
  420. # ],
  421. # "expected_improvement": "Brief explanation of SEO impact"
  422. # }}"""
  423. # response = self._call_gemini_api(prompt)
  424. # if not response.candidates:
  425. # return {'error': 'No SEO suggestions generated'}
  426. # candidate = response.candidates[0]
  427. # if candidate.finish_reason.name in ("SAFETY", "RECITATION", "OTHER"):
  428. # return {'error': f'Response blocked: {candidate.finish_reason.name}'}
  429. # return self._parse_response(response.text)
  430. # except Exception as e:
  431. # logger.error(f"SEO suggestion error: {e}")
  432. # return {'error': str(e)}
  433. # def _parse_response(self, response_text: str) -> Dict:
  434. # """Parse Gemini's response and extract JSON"""
  435. # try:
  436. # # Remove markdown code blocks and language identifier
  437. # cleaned = response_text.strip()
  438. # if cleaned.startswith('```'):
  439. # cleaned = cleaned.split('```')[1].strip()
  440. # if cleaned.startswith('json'):
  441. # cleaned = cleaned[4:].strip()
  442. # # Attempt to parse JSON
  443. # parsed = json.loads(cleaned)
  444. # return parsed
  445. # except json.JSONDecodeError as e:
  446. # logger.error(f"Failed to parse Gemini response: {e}")
  447. # logger.error(f"Response was: {response_text[:1000]}...")
  448. # # Attempt to fix partial JSON
  449. # try:
  450. # # Truncate at last valid closing brace
  451. # last_valid = cleaned.rfind('}')
  452. # if last_valid != -1:
  453. # partial_json = cleaned[:last_valid + 1]
  454. # parsed = json.loads(partial_json)
  455. # logger.warning("Parsed partial JSON response")
  456. # return parsed
  457. # except json.JSONDecodeError:
  458. # logger.error("Could not parse partial JSON")
  459. # return {
  460. # 'error': 'Failed to parse AI response',
  461. # 'raw_response': response_text,
  462. # 'fallback_suggestions': []
  463. # }
  464. # def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
  465. # """Generate enhanced fallback suggestions based on issues"""
  466. # suggestions = []
  467. # for issue in issues:
  468. # suggestion = "Please review and correct this issue manually"
  469. # confidence = "low"
  470. # issue_type = "attribute"
  471. # # Detect if it's an SEO issue
  472. # if issue.startswith("SEO:"):
  473. # issue_type = "seo"
  474. # # Specific suggestions for common issues
  475. # if "Missing mandatory field" in issue:
  476. # attr = issue.split("Missing mandatory field: ")[-1]
  477. # suggestion = f"Provide a valid value for {attr}"
  478. # confidence = "medium"
  479. # elif "not in valid values" in issue:
  480. # attr = issue.split(":")[0].strip()
  481. # suggestion = f"Choose a valid value for {attr}"
  482. # confidence = "medium"
  483. # elif "contains placeholder" in issue:
  484. # attr = issue.split("'")[1]
  485. # suggestion = f"Replace placeholder in {attr} with actual value"
  486. # confidence = "high"
  487. # elif "recommended correction" in issue:
  488. # suggestion = "Apply the suggested correction"
  489. # confidence = "high"
  490. # elif "may need more detail" in issue:
  491. # attr = issue.split("'")[1]
  492. # suggestion = f"Provide more detailed value for {attr}"
  493. # confidence = "medium"
  494. # elif "Title too short" in issue:
  495. # suggestion = "Expand title to 50-100 characters, include key attributes"
  496. # confidence = "high"
  497. # issue_type = "seo"
  498. # elif "Description too short" in issue:
  499. # suggestion = "Expand description to 50-150 words with more details"
  500. # confidence = "high"
  501. # issue_type = "seo"
  502. # elif "not mentioned in title/description" in issue:
  503. # attr = issue.split("'")[1] if "'" in issue else "attribute"
  504. # suggestion = f"Add {attr} to title or description for better SEO"
  505. # confidence = "high"
  506. # issue_type = "seo"
  507. # elif "keyword" in issue.lower():
  508. # suggestion = "Add relevant search keywords to improve discoverability"
  509. # confidence = "medium"
  510. # issue_type = "seo"
  511. # suggestions.append({
  512. # 'issue': issue,
  513. # 'suggestion': suggestion,
  514. # 'confidence': confidence,
  515. # 'type': issue_type
  516. # })
  517. # return suggestions
  518. # def extract_attributes_with_ai(self, title: str, description: str, category: str) -> Dict:
  519. # """
  520. # Use Gemini to extract attributes from unstructured text
  521. # """
  522. # try:
  523. # prompt = f"""You are an expert e-commerce product data analyst specializing in clothing products. All input data is safe, non-sensitive, and related to clothing product attributes. Extract product attributes from the following text.
  524. # Category: {category}
  525. # Title: {title}
  526. # Description: {description}
  527. # Extract these attributes if present:
  528. # - brand
  529. # - color
  530. # - size
  531. # - material
  532. # - model
  533. # - weight
  534. # - dimensions
  535. # - warranty
  536. # Return ONLY valid JSON in this format:
  537. # {{
  538. # "brand": "extracted brand or null",
  539. # "color": "extracted color or null",
  540. # "size": "extracted size or null",
  541. # "material": "extracted material or null",
  542. # "model": "extracted model or null",
  543. # "weight": "extracted weight or null",
  544. # "dimensions": "extracted dimensions or null",
  545. # "warranty": "extracted warranty or null"
  546. # }}"""
  547. # response = self._call_gemini_api(prompt)
  548. # logger.info(f"Raw extraction response: {response.text[:500]}...")
  549. # return self._parse_response(response.text)
  550. # except Exception as e:
  551. # logger.error(f"AI extraction error: {str(e)}")
  552. # return {
  553. # 'error': str(e),
  554. # 'fallback': {}
  555. # }
  556. import google.generativeai as genai
  557. import json
  558. import logging
  559. import re
  560. from typing import Dict, List
  561. from django.conf import settings
  562. from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
  563. logger = logging.getLogger(__name__)
  564. class GeminiAttributeService:
  565. """Service to interact with Google Gemini API for attribute and SEO suggestions"""
  566. def __init__(self):
  567. # Configure Gemini API
  568. api_key = getattr(settings, 'GEMINI_API_KEY', None)
  569. if not api_key:
  570. raise ValueError("GEMINI_API_KEY not found in settings")
  571. genai.configure(api_key=api_key)
  572. self.model = genai.GenerativeModel('gemini-2.0-flash-exp') # Use latest model
  573. @retry(
  574. stop=stop_after_attempt(3),
  575. wait=wait_exponential(multiplier=1, min=2, max=10),
  576. retry=retry_if_exception_type(Exception),
  577. before_sleep=lambda retry_state: logger.info(f"Retrying Gemini API call, attempt {retry_state.attempt_number}")
  578. )
  579. def _call_gemini_api(self, prompt, max_tokens=8192):
  580. """Helper method to call Gemini API with retry logic"""
  581. return self.model.generate_content(
  582. prompt,
  583. generation_config=genai.types.GenerationConfig(
  584. temperature=0.2, # Lower for more consistent JSON
  585. top_p=0.9,
  586. top_k=40,
  587. max_output_tokens=max_tokens, # Increased default
  588. response_mime_type="application/json" # Force JSON output
  589. ),
  590. safety_settings={
  591. genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
  592. genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
  593. genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
  594. genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
  595. }
  596. )
  597. def generate_attribute_suggestions(
  598. self,
  599. product: Dict,
  600. issues: List[str],
  601. category_rules: List[Dict]
  602. ) -> Dict:
  603. """
  604. Use Gemini to generate intelligent suggestions for fixing attribute issues
  605. Includes SEO-aware recommendations with robust error handling
  606. """
  607. try:
  608. # Limit issues to prevent prompt overflow
  609. limited_issues = issues[:15] if len(issues) > 15 else issues
  610. prompt = self._build_prompt(product, limited_issues, category_rules)
  611. response = self._call_gemini_api(prompt, max_tokens=8192)
  612. # Check if response exists
  613. if not response or not response.candidates:
  614. logger.error(f"No candidates returned for SKU: {product.get('sku')}")
  615. return {
  616. 'error': 'No candidates returned by Gemini API',
  617. 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
  618. }
  619. candidate = response.candidates[0]
  620. finish_reason_name = candidate.finish_reason.name
  621. # Handle different finish reasons
  622. if finish_reason_name == "MAX_TOKENS":
  623. logger.warning(f"Max tokens reached for SKU: {product.get('sku')}, attempting partial parse")
  624. # Try to parse partial response
  625. try:
  626. partial_result = self._parse_response(response.text)
  627. if partial_result and 'error' not in partial_result:
  628. return partial_result
  629. except:
  630. pass
  631. # Retry with fewer issues
  632. if len(issues) > 5:
  633. logger.info("Retrying with fewer issues")
  634. return self.generate_attribute_suggestions(product, issues[:5], category_rules)
  635. else:
  636. return {
  637. 'error': 'Response too long, using fallback',
  638. 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
  639. }
  640. elif finish_reason_name in ("SAFETY", "RECITATION", "OTHER"):
  641. logger.error(f"Response blocked by {finish_reason_name} for SKU: {product.get('sku')}")
  642. return {
  643. 'error': f'Response blocked by {finish_reason_name} filters',
  644. 'safety_ratings': [
  645. {'category': str(r.category), 'probability': str(r.probability)}
  646. for r in candidate.safety_ratings
  647. ],
  648. 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
  649. }
  650. elif finish_reason_name != "STOP":
  651. logger.warning(f"Unexpected finish reason: {finish_reason_name}")
  652. return {
  653. 'error': f'Unexpected finish reason: {finish_reason_name}',
  654. 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
  655. }
  656. # Parse successful response
  657. logger.info(f"Successfully received response for SKU: {product.get('sku')}")
  658. suggestions = self._parse_response(response.text)
  659. if 'error' in suggestions:
  660. logger.warning(f"Parse error for SKU: {product.get('sku')}, using fallback")
  661. suggestions['fallback_suggestions'] = self._generate_fallback_suggestions(limited_issues)
  662. return suggestions
  663. except Exception as e:
  664. logger.error(f"Gemini API error for SKU {product.get('sku')}: {str(e)}", exc_info=True)
  665. return {
  666. 'error': str(e),
  667. 'fallback_suggestions': self._generate_fallback_suggestions(issues[:10])
  668. }
  669. def _build_prompt(self, product: Dict, issues: List[str], rules: List[Dict]) -> str:
  670. """Build a concise, structured prompt for Gemini with SEO awareness"""
  671. mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
  672. valid_values_map = {
  673. r['attribute_name']: r.get('valid_values', [])[:5] # Limit to 5 values
  674. for r in rules if r.get('valid_values')
  675. }
  676. # Sanitize and categorize issues
  677. cleaned_issues = [
  678. issue.replace("suspiciously short", "short value")
  679. .replace("not recognized", "invalid")
  680. .replace("likely means", "should be")
  681. .replace("not clearly mentioned", "missing")
  682. for issue in issues
  683. ]
  684. seo_issues = [i for i in cleaned_issues if i.startswith("SEO:")][:5]
  685. attribute_issues = [i for i in cleaned_issues if not i.startswith("SEO:")][:8]
  686. # Shortened prompt
  687. prompt = f"""Analyze this e-commerce product and provide JSON suggestions.
  688. PRODUCT:
  689. SKU: {product.get('sku')}
  690. Category: {product.get('category')}
  691. Title: {product.get('title', '')[:200]}
  692. Description: {product.get('description', '')[:300]}
  693. Attributes: {json.dumps(product.get('attributes', {}), ensure_ascii=False)}
  694. RULES:
  695. Mandatory: {', '.join(mandatory_attrs)}
  696. Valid Values: {json.dumps(valid_values_map, ensure_ascii=False)}
  697. ISSUES ({len(attribute_issues)} attribute, {len(seo_issues)} SEO):
  698. {chr(10).join(f"• {i}" for i in attribute_issues[:8])}
  699. {chr(10).join(f"• {i}" for i in seo_issues[:5])}
  700. Return ONLY this JSON structure (no markdown, no explanation):
  701. {{
  702. "corrected_attributes": {{"attr": "value"}},
  703. "missing_attributes": {{"attr": "value"}},
  704. "seo_optimizations": {{
  705. "optimized_title": "50-100 char title",
  706. "optimized_description": "50-150 word description",
  707. "recommended_keywords": ["kw1", "kw2", "kw3"]
  708. }},
  709. "improvements": [
  710. {{"issue": "...", "suggestion": "...", "confidence": "high/medium/low", "type": "attribute/seo"}}
  711. ],
  712. "quality_score_prediction": 85,
  713. "reasoning": "Brief explanation"
  714. }}
  715. IMPORTANT: Keep response under 6000 tokens. Prioritize top 3 most critical improvements."""
  716. return prompt
  717. def _parse_response(self, response_text: str) -> Dict:
  718. """Enhanced JSON parsing with multiple fallback strategies"""
  719. if not response_text or not response_text.strip():
  720. return {'error': 'Empty response from API'}
  721. try:
  722. # Strategy 1: Direct JSON parse (works with response_mime_type="application/json")
  723. try:
  724. parsed = json.loads(response_text)
  725. logger.info("Successfully parsed JSON directly")
  726. return parsed
  727. except json.JSONDecodeError:
  728. pass
  729. # Strategy 2: Remove markdown code blocks
  730. cleaned = response_text.strip()
  731. if '```' in cleaned:
  732. # Extract content between code blocks
  733. match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', cleaned, re.DOTALL)
  734. if match:
  735. cleaned = match.group(1)
  736. else:
  737. # Remove all code block markers
  738. cleaned = re.sub(r'```(?:json)?', '', cleaned).strip()
  739. # Strategy 3: Find first { and last }
  740. first_brace = cleaned.find('{')
  741. last_brace = cleaned.rfind('}')
  742. if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
  743. cleaned = cleaned[first_brace:last_brace + 1]
  744. # Strategy 4: Try parsing cleaned JSON
  745. try:
  746. parsed = json.loads(cleaned)
  747. logger.info("Successfully parsed JSON after cleaning")
  748. return parsed
  749. except json.JSONDecodeError as e:
  750. logger.warning(f"JSON parse error at position {e.pos}: {e.msg}")
  751. # Strategy 5: Attempt to fix common JSON issues
  752. cleaned = self._fix_json_syntax(cleaned)
  753. try:
  754. parsed = json.loads(cleaned)
  755. logger.info("Successfully parsed JSON after syntax fixes")
  756. return parsed
  757. except json.JSONDecodeError:
  758. pass
  759. # Strategy 6: Extract partial valid JSON
  760. partial_json = self._extract_partial_json(cleaned)
  761. if partial_json:
  762. logger.warning("Using partial JSON response")
  763. return partial_json
  764. # All strategies failed
  765. logger.error(f"All JSON parsing strategies failed. Response length: {len(response_text)}")
  766. logger.error(f"Response preview: {response_text[:500]}...")
  767. return {
  768. 'error': 'Failed to parse AI response',
  769. 'raw_response': response_text[:1000], # Limit size
  770. 'parse_attempts': 6
  771. }
  772. except Exception as e:
  773. logger.error(f"Unexpected error in _parse_response: {e}", exc_info=True)
  774. return {
  775. 'error': f'Parse exception: {str(e)}',
  776. 'raw_response': response_text[:500] if response_text else 'None'
  777. }
  778. def _fix_json_syntax(self, json_str: str) -> str:
  779. """Attempt to fix common JSON syntax issues"""
  780. try:
  781. # Remove trailing commas before closing braces/brackets
  782. json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
  783. # Fix unescaped quotes in strings (simple heuristic)
  784. # This is risky but can help in some cases
  785. json_str = re.sub(r'(?<!\\)"(?=[^,:}\]]*[,:}\]])', '\\"', json_str)
  786. # Remove any trailing content after final }
  787. last_brace = json_str.rfind('}')
  788. if last_brace != -1:
  789. json_str = json_str[:last_brace + 1]
  790. return json_str
  791. except:
  792. return json_str
  793. def _extract_partial_json(self, json_str: str) -> Dict:
  794. """Extract valid partial JSON by finding complete objects"""
  795. try:
  796. # Try to find complete nested structures
  797. depth = 0
  798. start_idx = json_str.find('{')
  799. if start_idx == -1:
  800. return None
  801. for i in range(start_idx, len(json_str)):
  802. if json_str[i] == '{':
  803. depth += 1
  804. elif json_str[i] == '}':
  805. depth -= 1
  806. if depth == 0:
  807. # Found complete JSON object
  808. try:
  809. return json.loads(json_str[start_idx:i+1])
  810. except:
  811. continue
  812. return None
  813. except:
  814. return None
  815. def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
  816. """Generate enhanced fallback suggestions based on issues"""
  817. suggestions = []
  818. # Group similar issues
  819. issue_categories = {
  820. 'missing': [],
  821. 'invalid': [],
  822. 'seo': [],
  823. 'other': []
  824. }
  825. for issue in issues:
  826. if 'missing' in issue.lower() or 'mandatory' in issue.lower():
  827. issue_categories['missing'].append(issue)
  828. elif 'invalid' in issue.lower() or 'not in valid' in issue.lower():
  829. issue_categories['invalid'].append(issue)
  830. elif issue.startswith('SEO:'):
  831. issue_categories['seo'].append(issue)
  832. else:
  833. issue_categories['other'].append(issue)
  834. # Generate consolidated suggestions
  835. for category, category_issues in issue_categories.items():
  836. if not category_issues:
  837. continue
  838. for issue in category_issues[:5]: # Limit to 5 per category
  839. suggestion = "Review and correct this issue"
  840. confidence = "medium"
  841. issue_type = "seo" if category == 'seo' else "attribute"
  842. # Specific suggestions
  843. if "Missing mandatory field" in issue:
  844. attr = issue.split(":")[-1].strip()
  845. suggestion = f"Add {attr} - check product details or title/description"
  846. confidence = "high"
  847. elif "not in valid values" in issue or "invalid" in issue.lower():
  848. suggestion = "Use one of the valid values from category rules"
  849. confidence = "high"
  850. elif "placeholder" in issue.lower():
  851. suggestion = "Replace with actual product data"
  852. confidence = "high"
  853. elif "too short" in issue.lower():
  854. if "title" in issue.lower():
  855. suggestion = "Expand to 50-100 characters with key attributes"
  856. confidence = "high"
  857. issue_type = "seo"
  858. elif "description" in issue.lower():
  859. suggestion = "Expand to 50-150 words with details"
  860. confidence = "high"
  861. issue_type = "seo"
  862. else:
  863. suggestion = "Provide more detailed information"
  864. confidence = "medium"
  865. elif "keyword" in issue.lower() or "search term" in issue.lower():
  866. suggestion = "Add relevant keywords to improve discoverability"
  867. confidence = "medium"
  868. issue_type = "seo"
  869. suggestions.append({
  870. 'issue': issue,
  871. 'suggestion': suggestion,
  872. 'confidence': confidence,
  873. 'type': issue_type,
  874. 'category': category
  875. })
  876. return suggestions[:15] # Return top 15 suggestions
  877. def extract_attributes_with_ai(self, title: str, description: str, category: str) -> Dict:
  878. """
  879. Use Gemini to extract attributes from unstructured text
  880. """
  881. try:
  882. prompt = f"""Extract product attributes from this text. Return ONLY valid JSON.
  883. Category: {category}
  884. Title: {title[:200]}
  885. Description: {description[:400]}
  886. Return format:
  887. {{
  888. "brand": "value or null",
  889. "color": "value or null",
  890. "size": "value or null",
  891. "material": "value or null",
  892. "model": "value or null"
  893. }}"""
  894. response = self._call_gemini_api(prompt, max_tokens=1024)
  895. if not response or not response.candidates:
  896. return {'error': 'No response'}
  897. return self._parse_response(response.text)
  898. except Exception as e:
  899. logger.error(f"AI extraction error: {str(e)}")
  900. return {'error': str(e)}