| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417 |
- # #gemini_service.py
- # import google.generativeai as genai
- # import json
- # import logging
- # import re
- # from typing import Dict, List
- # from django.conf import settings
- # from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
- # logger = logging.getLogger(__name__)
- # class GeminiAttributeService:
- # """Service to interact with Google Gemini API for attribute and SEO suggestions"""
-
- # def __init__(self):
- # # Configure Gemini API
- # api_key = getattr(settings, 'GEMINI_API_KEY', None)
- # if not api_key:
- # raise ValueError("GEMINI_API_KEY not found in settings")
- # genai.configure(api_key=api_key)
- # self.model = genai.GenerativeModel('gemini-2.0-flash-exp') # Use latest model
-
- # @retry(
- # stop=stop_after_attempt(3),
- # wait=wait_exponential(multiplier=1, min=2, max=10),
- # retry=retry_if_exception_type(Exception),
- # before_sleep=lambda retry_state: logger.info(f"Retrying Gemini API call, attempt {retry_state.attempt_number}")
- # )
- # def _call_gemini_api(self, prompt, max_tokens=8192):
- # """Helper method to call Gemini API with retry logic"""
- # return self.model.generate_content(
- # prompt,
- # generation_config=genai.types.GenerationConfig(
- # temperature=0.2, # Lower for more consistent JSON
- # top_p=0.9,
- # top_k=40,
- # max_output_tokens=max_tokens, # Increased default
- # response_mime_type="application/json" # Force JSON output
- # ),
- # safety_settings={
- # genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
- # }
- # )
- # def generate_attribute_suggestions(
- # self,
- # product: Dict,
- # issues: List[str],
- # category_rules: List[Dict]
- # ) -> Dict:
- # """
- # Use Gemini to generate intelligent suggestions for fixing attribute issues
- # Includes SEO-aware recommendations with robust error handling
- # """
- # try:
- # # Limit issues to prevent prompt overflow
- # limited_issues = issues[:15] if len(issues) > 15 else issues
-
- # prompt = self._build_prompt(product, limited_issues, category_rules)
- # response = self._call_gemini_api(prompt, max_tokens=8192)
-
- # # Check if response exists
- # if not response or not response.candidates:
- # logger.error(f"No candidates returned for SKU: {product.get('sku')}")
- # return {
- # 'error': 'No candidates returned by Gemini API',
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
-
- # candidate = response.candidates[0]
- # finish_reason_name = candidate.finish_reason.name
-
- # # Handle different finish reasons
- # if finish_reason_name == "MAX_TOKENS":
- # logger.warning(f"Max tokens reached for SKU: {product.get('sku')}, attempting partial parse")
- # # Try to parse partial response
- # try:
- # partial_result = self._parse_response(response.text)
- # if partial_result and 'error' not in partial_result:
- # return partial_result
- # except:
- # pass
- # # Retry with fewer issues
- # if len(issues) > 5:
- # logger.info("Retrying with fewer issues")
- # return self.generate_attribute_suggestions(product, issues[:5], category_rules)
- # else:
- # return {
- # 'error': 'Response too long, using fallback',
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
-
- # elif finish_reason_name in ("SAFETY", "RECITATION", "OTHER"):
- # logger.error(f"Response blocked by {finish_reason_name} for SKU: {product.get('sku')}")
- # return {
- # 'error': f'Response blocked by {finish_reason_name} filters',
- # 'safety_ratings': [
- # {'category': str(r.category), 'probability': str(r.probability)}
- # for r in candidate.safety_ratings
- # ],
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
-
- # elif finish_reason_name != "STOP":
- # logger.warning(f"Unexpected finish reason: {finish_reason_name}")
- # return {
- # 'error': f'Unexpected finish reason: {finish_reason_name}',
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
-
- # # Parse successful response
- # logger.info(f"Successfully received response for SKU: {product.get('sku')}")
- # suggestions = self._parse_response(response.text)
-
- # if 'error' in suggestions:
- # logger.warning(f"Parse error for SKU: {product.get('sku')}, using fallback")
- # suggestions['fallback_suggestions'] = self._generate_fallback_suggestions(limited_issues)
-
- # return suggestions
-
- # except Exception as e:
- # logger.error(f"Gemini API error for SKU {product.get('sku')}: {str(e)}", exc_info=True)
- # return {
- # 'error': str(e),
- # 'fallback_suggestions': self._generate_fallback_suggestions(issues[:10])
- # }
- # def _build_prompt(self, product: Dict, issues: List[str], rules: List[Dict]) -> str:
- # """Build a concise, structured prompt for Gemini with SEO awareness"""
- # mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
- # valid_values_map = {
- # r['attribute_name']: r.get('valid_values', [])[:5] # Limit to 5 values
- # for r in rules if r.get('valid_values')
- # }
-
- # # Sanitize and categorize issues
- # cleaned_issues = [
- # issue.replace("suspiciously short", "short value")
- # .replace("not recognized", "invalid")
- # .replace("likely means", "should be")
- # .replace("not clearly mentioned", "missing")
- # for issue in issues
- # ]
-
- # seo_issues = [i for i in cleaned_issues if i.startswith("SEO:")][:5]
- # attribute_issues = [i for i in cleaned_issues if not i.startswith("SEO:")][:8]
-
- # # Shortened prompt
- # prompt = f"""Analyze this e-commerce product and provide JSON suggestions.
- # PRODUCT:
- # SKU: {product.get('sku')}
- # Category: {product.get('category')}
- # Title: {product.get('title', '')[:200]}
- # Description: {product.get('description', '')[:300]}
- # Attributes: {json.dumps(product.get('attributes', {}), ensure_ascii=False)}
- # RULES:
- # Mandatory: {', '.join(mandatory_attrs)}
- # Valid Values: {json.dumps(valid_values_map, ensure_ascii=False)}
- # ISSUES ({len(attribute_issues)} attribute, {len(seo_issues)} SEO):
- # {chr(10).join(f"• {i}" for i in attribute_issues[:8])}
- # {chr(10).join(f"• {i}" for i in seo_issues[:5])}
- # Return ONLY this JSON structure (no markdown, no explanation):
- # {{
- # "corrected_attributes": {{"attr": "value"}},
- # "missing_attributes": {{"attr": "value"}},
- # "seo_optimizations": {{
- # "optimized_title": "50-100 char title",
- # "optimized_description": "50-150 word description",
- # "recommended_keywords": ["kw1", "kw2", "kw3"]
- # }},
- # "improvements": [
- # {{"issue": "...", "suggestion": "...", "confidence": "high/medium/low", "type": "attribute/seo"}}
- # ],
- # "quality_score_prediction": 85,
- # "reasoning": "Brief explanation"
- # }}
- # IMPORTANT: Keep response under 6000 tokens. Prioritize top 3 most critical improvements."""
- # return prompt
- # def _parse_response(self, response_text: str) -> Dict:
- # """Enhanced JSON parsing with multiple fallback strategies"""
- # if not response_text or not response_text.strip():
- # return {'error': 'Empty response from API'}
-
- # try:
- # # Strategy 1: Direct JSON parse (works with response_mime_type="application/json")
- # try:
- # parsed = json.loads(response_text)
- # logger.info("Successfully parsed JSON directly")
- # return parsed
- # except json.JSONDecodeError:
- # pass
-
- # # Strategy 2: Remove markdown code blocks
- # cleaned = response_text.strip()
- # if '```' in cleaned:
- # # Extract content between code blocks
- # match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', cleaned, re.DOTALL)
- # if match:
- # cleaned = match.group(1)
- # else:
- # # Remove all code block markers
- # cleaned = re.sub(r'```(?:json)?', '', cleaned).strip()
-
- # # Strategy 3: Find first { and last }
- # first_brace = cleaned.find('{')
- # last_brace = cleaned.rfind('}')
-
- # if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
- # cleaned = cleaned[first_brace:last_brace + 1]
-
- # # Strategy 4: Try parsing cleaned JSON
- # try:
- # parsed = json.loads(cleaned)
- # logger.info("Successfully parsed JSON after cleaning")
- # return parsed
- # except json.JSONDecodeError as e:
- # logger.warning(f"JSON parse error at position {e.pos}: {e.msg}")
-
- # # Strategy 5: Attempt to fix common JSON issues
- # cleaned = self._fix_json_syntax(cleaned)
- # try:
- # parsed = json.loads(cleaned)
- # logger.info("Successfully parsed JSON after syntax fixes")
- # return parsed
- # except json.JSONDecodeError:
- # pass
-
- # # Strategy 6: Extract partial valid JSON
- # partial_json = self._extract_partial_json(cleaned)
- # if partial_json:
- # logger.warning("Using partial JSON response")
- # return partial_json
-
- # # All strategies failed
- # logger.error(f"All JSON parsing strategies failed. Response length: {len(response_text)}")
- # logger.error(f"Response preview: {response_text[:500]}...")
-
- # return {
- # 'error': 'Failed to parse AI response',
- # 'raw_response': response_text[:1000], # Limit size
- # 'parse_attempts': 6
- # }
-
- # except Exception as e:
- # logger.error(f"Unexpected error in _parse_response: {e}", exc_info=True)
- # return {
- # 'error': f'Parse exception: {str(e)}',
- # 'raw_response': response_text[:500] if response_text else 'None'
- # }
-
- # def _fix_json_syntax(self, json_str: str) -> str:
- # """Attempt to fix common JSON syntax issues"""
- # try:
- # # Remove trailing commas before closing braces/brackets
- # json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
-
- # # Fix unescaped quotes in strings (simple heuristic)
- # # This is risky but can help in some cases
- # json_str = re.sub(r'(?<!\\)"(?=[^,:}\]]*[,:}\]])', '\\"', json_str)
-
- # # Remove any trailing content after final }
- # last_brace = json_str.rfind('}')
- # if last_brace != -1:
- # json_str = json_str[:last_brace + 1]
-
- # return json_str
- # except:
- # return json_str
-
- # def _extract_partial_json(self, json_str: str) -> Dict:
- # """Extract valid partial JSON by finding complete objects"""
- # try:
- # # Try to find complete nested structures
- # depth = 0
- # start_idx = json_str.find('{')
- # if start_idx == -1:
- # return None
-
- # for i in range(start_idx, len(json_str)):
- # if json_str[i] == '{':
- # depth += 1
- # elif json_str[i] == '}':
- # depth -= 1
- # if depth == 0:
- # # Found complete JSON object
- # try:
- # return json.loads(json_str[start_idx:i+1])
- # except:
- # continue
-
- # return None
- # except:
- # return None
- # def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
- # """Generate enhanced fallback suggestions based on issues"""
- # suggestions = []
-
- # # Group similar issues
- # issue_categories = {
- # 'missing': [],
- # 'invalid': [],
- # 'seo': [],
- # 'other': []
- # }
-
- # for issue in issues:
- # if 'missing' in issue.lower() or 'mandatory' in issue.lower():
- # issue_categories['missing'].append(issue)
- # elif 'invalid' in issue.lower() or 'not in valid' in issue.lower():
- # issue_categories['invalid'].append(issue)
- # elif issue.startswith('SEO:'):
- # issue_categories['seo'].append(issue)
- # else:
- # issue_categories['other'].append(issue)
-
- # # Generate consolidated suggestions
- # for category, category_issues in issue_categories.items():
- # if not category_issues:
- # continue
-
- # for issue in category_issues[:5]: # Limit to 5 per category
- # suggestion = "Review and correct this issue"
- # confidence = "medium"
- # issue_type = "seo" if category == 'seo' else "attribute"
-
- # # Specific suggestions
- # if "Missing mandatory field" in issue:
- # attr = issue.split(":")[-1].strip()
- # suggestion = f"Add {attr} - check product details or title/description"
- # confidence = "high"
- # elif "not in valid values" in issue or "invalid" in issue.lower():
- # suggestion = "Use one of the valid values from category rules"
- # confidence = "high"
- # elif "placeholder" in issue.lower():
- # suggestion = "Replace with actual product data"
- # confidence = "high"
- # elif "too short" in issue.lower():
- # if "title" in issue.lower():
- # suggestion = "Expand to 50-100 characters with key attributes"
- # confidence = "high"
- # issue_type = "seo"
- # elif "description" in issue.lower():
- # suggestion = "Expand to 50-150 words with details"
- # confidence = "high"
- # issue_type = "seo"
- # else:
- # suggestion = "Provide more detailed information"
- # confidence = "medium"
- # elif "keyword" in issue.lower() or "search term" in issue.lower():
- # suggestion = "Add relevant keywords to improve discoverability"
- # confidence = "medium"
- # issue_type = "seo"
-
- # suggestions.append({
- # 'issue': issue,
- # 'suggestion': suggestion,
- # 'confidence': confidence,
- # 'type': issue_type,
- # 'category': category
- # })
-
- # return suggestions[:15] # Return top 15 suggestions
-
- # def extract_attributes_with_ai(self, title: str, description: str, category: str) -> Dict:
- # """
- # Use Gemini to extract attributes from unstructured text
- # """
- # try:
- # prompt = f"""Extract product attributes from this text. Return ONLY valid JSON.
- # Category: {category}
- # Title: {title[:200]}
- # Description: {description[:400]}
- # Return format:
- # {{
- # "brand": "value or null",
- # "color": "value or null",
- # "size": "value or null",
- # "material": "value or null",
- # "model": "value or null"
- # }}"""
- # response = self._call_gemini_api(prompt, max_tokens=1024)
-
- # if not response or not response.candidates:
- # return {'error': 'No response'}
-
- # return self._parse_response(response.text)
-
- # except Exception as e:
- # logger.error(f"AI extraction error: {str(e)}")
- # return {'error': str(e)}
- # # gemini_service_enhanced.py
- # """
- # Enhanced Gemini service with comprehensive suggestions for all components
- # """
- # import google.generativeai as genai
- # import json
- # import logging
- # import re
- # from typing import Dict, List
- # from django.conf import settings
- # from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
- # logger = logging.getLogger(__name__)
- # class GeminiAttributeService:
- # """Enhanced service with comprehensive AI suggestions"""
-
- # def __init__(self):
- # api_key = getattr(settings, 'GEMINI_API_KEY', None)
- # if not api_key:
- # raise ValueError("GEMINI_API_KEY not found in settings")
- # genai.configure(api_key=api_key)
- # self.model = genai.GenerativeModel('gemini-2.5-flash')
-
- # @retry(
- # stop=stop_after_attempt(3),
- # wait=wait_exponential(multiplier=1, min=2, max=10),
- # retry=retry_if_exception_type(Exception)
- # )
- # def _call_gemini_api(self, prompt, max_tokens=8192):
- # """Helper method to call Gemini API with retry logic"""
- # try:
- # return self.model.generate_content(
- # prompt,
- # generation_config=genai.types.GenerationConfig(
- # temperature=0.2,
- # top_p=0.9,
- # top_k=40,
- # max_output_tokens=max_tokens,
- # response_mime_type="application/json"
- # ),
- # safety_settings={
- # genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
- # }
- # )
- # # except genai.types.GenerationError as e:
- # # # Handle specific generation errors
- # # print("Generation error:", str(e))
- # # return None
- # # # return {"error": "Content generation failed", "details": str(e)}
- # except Exception as e:
- # # Catch-all for any other unexpected errors
- # print("Unexpected error:", str(e))
- # return None
- # # return {"error": "Unexpected error occurred", "details": str(e)}
-
- # def generate_comprehensive_suggestions(
- # self,
- # product: Dict,
- # issues: List[str],
- # category_rules: List[Dict],
- # scores: Dict
- # ) -> Dict:
- # """
- # Generate comprehensive AI suggestions covering ALL quality aspects
- # """
- # try:
- # limited_issues = issues[:20] if len(issues) > 20 else issues
-
- # prompt = self._build_comprehensive_prompt(product, limited_issues, category_rules, scores)
- # response = self._call_gemini_api(prompt, max_tokens=8192)
- # # print("response",response)
- # if not response or not response.candidates:
- # logger.error(f"No candidates returned for SKU: {product.get('sku')}")
- # return {
- # 'error': 'No response from AI',
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
-
- # candidate = response.candidates[0]
- # finish_reason = candidate.finish_reason.name
-
- # if finish_reason != "STOP":
- # logger.warning(f"Non-STOP finish reason: {finish_reason}")
- # if finish_reason == "MAX_TOKENS" and len(issues) > 10:
- # return self.generate_comprehensive_suggestions(product, issues[:10], category_rules, scores)
- # return {
- # 'error': f'Response blocked: {finish_reason}',
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
-
- # logger.info(f"Successfully received comprehensive suggestions for SKU: {product.get('sku')}")
- # suggestions = self._parse_response(response.text)
-
- # if 'error' in suggestions:
- # suggestions['fallback_suggestions'] = self._generate_fallback_suggestions(limited_issues)
-
- # return suggestions
-
- # except Exception as e:
- # logger.error(f"Gemini API error: {str(e)}", exc_info=True)
- # return {
- # 'error': str(e),
- # 'fallback_suggestions': self._generate_fallback_suggestions(issues[:15])
- # }
-
- # def _build_comprehensive_prompt(
- # self,
- # product: Dict,
- # issues: List[str],
- # rules: List[Dict],
- # scores: Dict
- # ) -> str:
- # """Build comprehensive prompt for all quality aspects"""
- # print("_build_comprehensive_prompt",product,issues,rules,scores)
- # mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
- # valid_values_map = {
- # r['attribute_name']: r.get('valid_values', [])[:5]
- # for r in rules if r.get('valid_values')
- # }
-
- # # Categorize issues
- # attribute_issues = [i for i in issues if not any(prefix in i for prefix in ['Title:', 'Description:', 'SEO:'])]
- # title_issues = [i for i in issues if i.startswith('Title:')]
- # desc_issues = [i for i in issues if i.startswith('Description:')]
- # seo_issues = [i for i in issues if i.startswith('SEO:')]
- # import random
- # a = random.uniform(90.2,95.9)
- # print("prompt start")
- # prompt = f"""Analyze this e-commerce product and provide comprehensive quality improvements.
- # Note: quality_score_prediction should be in range of 90 to 95
- # PRODUCT DATA:
- # SKU: {product.get('sku')}
- # Category: {product.get('category')}
- # Title: {product.get('title', '')[:250]}
- # Description: {product.get('description', '')[:400]}
- # Attributes: {json.dumps(product.get('attributes', {}), ensure_ascii=False)}
- # QUALITY SCORES (out of 100):
- # - Mandatory Fields: {scores.get('mandatory_fields', 0):.1f}
- # - Standardization: {scores.get('standardization', 0):.1f}
- # - Missing Values: {scores.get('missing_values', 0):.1f}
- # - Consistency: {scores.get('consistency', 0):.1f}
- # - SEO: {scores.get('seo_discoverability', 0):.1f}
- # - Title Quality: {scores.get('title_quality', 0):.1f}
- # - Description Quality: {scores.get('description_quality', 0):.1f}
- # CATEGORY RULES:
- # Mandatory Attributes: {', '.join(mandatory_attrs)}
- # Valid Values: {json.dumps(valid_values_map, ensure_ascii=False)}
- # ISSUES FOUND:
- # Attributes ({len(attribute_issues)}):
- # {chr(10).join(f" • {i}" for i in attribute_issues[:8])}
- # Title ({len(title_issues)}):
- # {chr(10).join(f" • {i}" for i in title_issues[:5])}
- # Description ({len(desc_issues)}):
- # {chr(10).join(f" • {i}" for i in desc_issues[:5])}
- # SEO ({len(seo_issues)}):
- # {chr(10).join(f" • {i}" for i in seo_issues[:5])}
- # Return ONLY this JSON structure:
- # {{
- # "corrected_attributes": {{
- # "attr_name": "corrected_value"
- # }},
- # "missing_attributes": {{
- # "attr_name": "suggested_value"
- # }},
- # "improved_title": "optimized title (50-100 chars, includes brand, model, key features)",
- # "improved_description": "enhanced description (50-150 words, features, benefits, specs, use cases)",
- # "seo_keywords": ["keyword1", "keyword2", "keyword3"],
- # "improvements": [
- # {{
- # "component": "attributes/title/description/seo",
- # "issue": "specific issue",
- # "suggestion": "how to fix",
- # "priority": "high/medium/low",
- # "confidence": "high/medium/low"
- # }}
- # ],
- # "quality_score_prediction": {a:.1f},
- # "summary": "Brief 2-3 sentence summary of key improvements needed"
- # }}
- # CRITICAL: Keep response under 7000 tokens. Focus on top 5 most impactful improvements."""
- # print("prompt",prompt)
- # return prompt
-
- # def _parse_response(self, response_text: str) -> Dict:
- # """Enhanced JSON parsing with fallback strategies"""
- # if not response_text or not response_text.strip():
- # return {'error': 'Empty response from API'}
-
- # try:
- # # Direct JSON parse
- # try:
- # parsed = json.loads(response_text)
- # logger.info("Successfully parsed JSON directly")
- # return parsed
- # except json.JSONDecodeError:
- # pass
-
- # # Remove markdown code blocks
- # cleaned = response_text.strip()
- # if '```' in cleaned:
- # match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', cleaned, re.DOTALL)
- # if match:
- # cleaned = match.group(1)
- # else:
- # cleaned = re.sub(r'```(?:json)?', '', cleaned).strip()
-
- # # Find first { and last }
- # first_brace = cleaned.find('{')
- # last_brace = cleaned.rfind('}')
-
- # if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
- # cleaned = cleaned[first_brace:last_brace + 1]
-
- # # Try parsing cleaned JSON
- # try:
- # parsed = json.loads(cleaned)
- # logger.info("Successfully parsed JSON after cleaning")
- # return parsed
- # except json.JSONDecodeError as e:
- # logger.warning(f"JSON parse error: {e}")
-
- # # Fix common JSON issues
- # cleaned = self._fix_json_syntax(cleaned)
- # try:
- # parsed = json.loads(cleaned)
- # logger.info("Successfully parsed JSON after syntax fixes")
- # return parsed
- # except json.JSONDecodeError:
- # pass
-
- # # Extract partial valid JSON
- # partial_json = self._extract_partial_json(cleaned)
- # if partial_json:
- # logger.warning("Using partial JSON response")
- # return partial_json
-
- # logger.error(f"All JSON parsing failed. Response length: {len(response_text)}")
- # return {
- # 'error': 'Failed to parse AI response',
- # 'raw_response': response_text[:500]
- # }
-
- # except Exception as e:
- # logger.error(f"Parse exception: {e}", exc_info=True)
- # return {
- # 'error': f'Parse exception: {str(e)}',
- # 'raw_response': response_text[:500] if response_text else 'None'
- # }
-
- # def _fix_json_syntax(self, json_str: str) -> str:
- # """Fix common JSON syntax issues"""
- # try:
- # # Remove trailing commas
- # json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
-
- # # Remove trailing content after final }
- # last_brace = json_str.rfind('}')
- # if last_brace != -1:
- # json_str = json_str[:last_brace + 1]
-
- # return json_str
- # except:
- # return json_str
-
- # def _extract_partial_json(self, json_str: str) -> Dict:
- # """Extract valid partial JSON"""
- # try:
- # depth = 0
- # start_idx = json_str.find('{')
- # if start_idx == -1:
- # return None
-
- # for i in range(start_idx, len(json_str)):
- # if json_str[i] == '{':
- # depth += 1
- # elif json_str[i] == '}':
- # depth -= 1
- # if depth == 0:
- # try:
- # return json.loads(json_str[start_idx:i+1])
- # except:
- # continue
- # return None
- # except:
- # return None
-
- # def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
- # """Generate fallback suggestions based on issues"""
- # suggestions = []
-
- # for issue in issues[:15]:
- # suggestion_text = "Review and correct this issue"
- # confidence = "medium"
- # component = "attribute"
- # priority = "medium"
-
- # issue_lower = issue.lower()
-
- # # Determine component
- # if issue.startswith('Title:'):
- # component = "title"
- # elif issue.startswith('Description:'):
- # component = "description"
- # elif issue.startswith('SEO:'):
- # component = "seo"
-
- # # Specific suggestions
- # if "missing mandatory" in issue_lower:
- # attr = issue.split(":")[-1].strip()
- # suggestion_text = f"Add required {attr} - check product packaging or manufacturer details"
- # priority = "high"
- # confidence = "high"
- # elif "too short" in issue_lower:
- # if "title" in issue_lower:
- # suggestion_text = "Expand title to 50-100 characters including brand, model, and key features"
- # component = "title"
- # priority = "high"
- # elif "description" in issue_lower:
- # suggestion_text = "Write comprehensive 50-150 word description with features, benefits, and specifications"
- # component = "description"
- # priority = "high"
- # else:
- # suggestion_text = "Provide more detailed information"
- # elif "placeholder" in issue_lower:
- # suggestion_text = "Replace with actual product data from manufacturer or packaging"
- # priority = "high"
- # elif "grammar" in issue_lower or "spelling" in issue_lower:
- # suggestion_text = "Run spell-check and grammar review, ensure professional language"
- # component = "description"
- # priority = "medium"
- # elif "keyword" in issue_lower or "seo" in issue_lower:
- # suggestion_text = "Add relevant search keywords and product attributes"
- # component = "seo"
- # priority = "medium"
- # elif "duplicate" in issue_lower or "repetit" in issue_lower:
- # suggestion_text = "Remove duplicate content, provide varied information with unique details"
- # component = "description"
- # priority = "medium"
- # elif "not recognized" in issue_lower or "invalid" in issue_lower:
- # suggestion_text = "Use standardized values from category rules"
- # priority = "high"
- # confidence = "high"
-
- # suggestions.append({
- # 'component': component,
- # 'issue': issue,
- # 'suggestion': suggestion_text,
- # 'priority': priority,
- # 'confidence': confidence
- # })
-
- # return suggestions
- # # gemini_service_enhanced.py
- # """
- # Enhanced Gemini service with comprehensive suggestions for all components
- # """
- # import google.generativeai as genai
- # import json
- # import logging
- # import re
- # from typing import Dict, List
- # from django.conf import settings
- # from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
- # logger = logging.getLogger(__name__)
- # class GeminiAttributeService:
- # """Enhanced service with comprehensive AI suggestions"""
-
- # def __init__(self):
- # api_key = getattr(settings, 'GEMINI_API_KEY', None)
- # if not api_key:
- # raise ValueError("GEMINI_API_KEY not found in settings")
- # genai.configure(api_key=api_key)
- # self.model = genai.GenerativeModel('gemini-2.5-flash')
-
- # @retry(
- # stop=stop_after_attempt(3),
- # wait=wait_exponential(multiplier=1, min=2, max=10),
- # retry=retry_if_exception_type(Exception)
- # )
- # def _call_gemini_api(self, prompt, max_tokens=8192):
- # """Helper method to call Gemini API with retry logic"""
- # try:
- # return self.model.generate_content(
- # prompt,
- # generation_config=genai.types.GenerationConfig(
- # temperature=0.2,
- # top_p=0.9,
- # top_k=40,
- # max_output_tokens=max_tokens,
- # response_mime_type="application/json"
- # ),
- # safety_settings={
- # genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
- # }
- # )
- # # except genai.types.GenerationError as e:
- # # # Handle specific generation errors
- # # print("Generation error:", str(e))
- # # return None
- # # # return {"error": "Content generation failed", "details": str(e)}
- # except Exception as e:
- # # Catch-all for any other unexpected errors
- # print("Unexpected error:", str(e))
- # return None
- # # return {"error": "Unexpected error occurred", "details": str(e)}
-
- # def generate_comprehensive_suggestions(
- # self,
- # product: Dict,
- # issues: List[str],
- # category_rules: List[Dict],
- # scores: Dict
- # ) -> Dict:
- # """
- # Generate comprehensive AI suggestions covering ALL quality aspects
- # """
- # try:
- # limited_issues = issues[:20] if len(issues) > 20 else issues
-
- # prompt = self._build_comprehensive_prompt(product, limited_issues, category_rules, scores)
- # response = self._call_gemini_api(prompt, max_tokens=8192)
- # # print("response",response)
- # if not response or not response.candidates:
- # logger.error(f"No candidates returned for SKU: {product.get('sku')}")
- # return {
- # 'error': 'No response from AI',
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
-
- # candidate = response.candidates[0]
- # finish_reason = candidate.finish_reason.name
-
- # if finish_reason != "STOP":
- # logger.warning(f"Non-STOP finish reason: {finish_reason}")
- # if finish_reason == "MAX_TOKENS" and len(issues) > 10:
- # return self.generate_comprehensive_suggestions(product, issues[:10], category_rules, scores)
- # return {
- # 'error': f'Response blocked: {finish_reason}',
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
-
- # logger.info(f"Successfully received comprehensive suggestions for SKU: {product.get('sku')}")
- # suggestions = self._parse_response(response.text)
-
- # if 'error' in suggestions:
- # suggestions['fallback_suggestions'] = self._generate_fallback_suggestions(limited_issues)
-
- # return suggestions
-
- # except Exception as e:
- # logger.error(f"Gemini API error: {str(e)}", exc_info=True)
- # return {
- # 'error': str(e),
- # 'fallback_suggestions': self._generate_fallback_suggestions(issues[:15])
- # }
-
- # def _build_comprehensive_prompt(
- # self,
- # product: Dict,
- # issues: List[str],
- # rules: List[Dict],
- # scores: Dict
- # ) -> str:
- # """Build comprehensive prompt for all quality aspects"""
- # print("_build_comprehensive_prompt",product,issues,rules,scores)
- # mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
- # valid_values_map = {
- # r['attribute_name']: r.get('valid_values', [])[:5]
- # for r in rules if r.get('valid_values')
- # }
-
- # # Categorize issues
- # attribute_issues = [i for i in issues if not any(prefix in i for prefix in ['Title:', 'Description:', 'SEO:'])]
- # title_issues = [i for i in issues if i.startswith('Title:')]
- # desc_issues = [i for i in issues if i.startswith('Description:')]
- # seo_issues = [i for i in issues if i.startswith('SEO:')]
- # import random
- # a = random.uniform(90.2,95.9)
- # print("prompt start")
- # prompt = f"""Analyze this e-commerce product and provide comprehensive quality improvements.
- # Note: quality_score_prediction should be in range of 90 to 95
- # PRODUCT DATA:
- # SKU: {product.get('sku')}
- # Category: {product.get('category')}
- # Title: {product.get('title', '')[:250]}
- # Description: {product.get('description', '')[:400]}
- # Attributes: {json.dumps(product.get('attributes', {}), ensure_ascii=False)}
- # QUALITY SCORES (out of 100):
- # - Mandatory Fields: {scores.get('mandatory_fields', 0):.1f}
- # - Standardization: {scores.get('standardization', 0):.1f}
- # - Missing Values: {scores.get('missing_values', 0):.1f}
- # - Consistency: {scores.get('consistency', 0):.1f}
- # - SEO: {scores.get('seo_discoverability', 0):.1f}
- # - Title Quality: {scores.get('title_quality', 0):.1f}
- # - Description Quality: {scores.get('description_quality', 0):.1f}
- # CATEGORY RULES:
- # Mandatory Attributes: {', '.join(mandatory_attrs)}
- # Valid Values: {json.dumps(valid_values_map, ensure_ascii=False)}
- # ISSUES FOUND:
- # Attributes ({len(attribute_issues)}):
- # {chr(10).join(f" • {i}" for i in attribute_issues[:8])}
- # Title ({len(title_issues)}):
- # {chr(10).join(f" • {i}" for i in title_issues[:5])}
- # Description ({len(desc_issues)}):
- # {chr(10).join(f" • {i}" for i in desc_issues[:5])}
- # SEO ({len(seo_issues)}):
- # {chr(10).join(f" • {i}" for i in seo_issues[:5])}
- # The product belongs to one of these categories: T-Shirts, Food, Chairs. Treat each category as a separate dataset and apply the following category-specific best practices when generating improved_title, improved_description, and other suggestions. Match the guidelines to the product's category.
- # CATEGORY-SPECIFIC GUIDELINES:
- # For T-Shirts:
- # Title Structure (based on eCommerce best practices from Amazon, Walmart, Target):
- # - Recommended sequence: Brand + Gender + Product Type + Key Feature + Material + Size + Color + Pack Size.
- # - Explanations: Brand builds trust and SEO; Gender targets audience; Product Type is core for discoverability; Key Feature highlights benefits like 'Slim Fit'; Material adds specificity for search; Size and Color improve conversion by matching user intent; Pack Size for value packs.
- # - Examples: "Nike Men's Slim Fit Cotton T-Shirt, Black, Large" or "Hanes Women's V-Neck Polyester Blend T-Shirt Pack of 3, White, Medium".
- # - Common pitfalls: Overly long titles (>150 chars), missing brand or size, using all caps, irrelevant keywords.
- # Best Practices for Product Descriptions:
- # - Recommended tone and length: Casual and engaging, 150-300 words.
- # - Structure: Short intro paragraph on style and comfort, followed by 3-5 bullet points on features/benefits (e.g., fabric, fit, durability).
- # - Keywords and SEO: Include terms like 'breathable cotton t-shirt', 'men's graphic tee'; front-load keywords.
- # - Examples: Effective - "This Nike t-shirt offers ultimate comfort with soft cotton fabric. Features: - Breathable material - Slim fit design - Machine washable"; Ineffective - Generic placeholders like "Good t-shirt".
- # - Do’s: Use sensory language (soft, comfortable); Don’ts: Avoid hype without facts, no spelling errors.
- # For Food:
- # Title Structure (based on eCommerce best practices from Amazon, Walmart, Target):
- # - Recommended sequence: Brand + Product Name + Flavor/Variety + Size/Weight + Type (e.g., Organic, Gluten-Free) + Pack Size.
- # - Explanations: Brand for recognition; Product Name for core identity; Flavor for appeal and search; Size/Weight for practicality; Type boosts SEO for dietary needs; Pack Size for bulk buyers.
- # - Examples: "Kellogg's Corn Flakes Cereal, Original Flavor, 18 oz Box" or "Organic Valley Whole Milk, 1 Gallon, Grass-Fed".
- # - Common pitfalls: Vague flavors, missing allergens, excessive adjectives, not including weight.
- # Best Practices for Product Descriptions:
- # - Recommended tone and length: Appetizing and informative, 200-400 words.
- # - Structure: Intro on taste and origin, followed by 3-5 bullet points on ingredients, nutrition, serving suggestions.
- # - Keywords and SEO: Include 'organic snacks', 'low-carb food'; natural integration.
- # - Examples: Effective - "Enjoy the crisp taste of Kellogg's Corn Flakes. Ingredients: Corn, sugar... Benefits: - High in fiber - Quick breakfast option"; Ineffective - Short and bland like "Cereal in box".
- # - Do’s: Highlight health benefits; Don’ts: No false claims, avoid listing only ingredients without context.
- # For Chairs:
- # Title Structure (based on eCommerce best practices from Amazon, Walmart, Target):
- # - Recommended sequence: Brand + Type (e.g., Office Chair) + Key Feature (e.g., Ergonomic) + Material + Color + Additional Features (e.g., Adjustable).
- # - Explanations: Brand for quality assurance; Type for category search; Key Feature for differentiation; Material for durability info; Color for aesthetics; Additional Features improve conversion.
- # - Examples: "Herman Miller Aeron Ergonomic Office Chair, Mesh Fabric, Black, Adjustable Arms" or "IKEA Markus Swivel Desk Chair, Leather, Gray, High Back".
- # - Common pitfalls: Too generic (e.g., "Chair"), missing dimensions, overloading with features.
- # Best Practices for Product Descriptions:
- # - Recommended tone and length: Professional and detailed, 250-500 words.
- # - Structure: Intro on comfort and use, followed by 3-5 bullet points on features/benefits (e.g., ergonomics, assembly, warranty).
- # - Keywords and SEO: Include 'ergonomic office chair', 'adjustable desk chair'; target user pain points.
- # - Examples: Effective - "The Herman Miller Aeron provides superior back support. Features: - Breathable mesh - Adjustable height - 12-year warranty"; Ineffective - Vague like "Nice chair for sitting".
- # - Do’s: Include dimensions and weight capacity; Don’ts: No unverified claims, avoid technical jargon without explanation.
- # Return ONLY this JSON structure:
- # {{
- # "corrected_attributes": {{
- # "attr_name": "corrected_value"
- # }},
- # "missing_attributes": {{
- # "attr_name": "suggested_value"
- # }},
- # "improved_title": "optimized title (50-100 chars, includes brand, model, key features)",
- # "improved_description": "enhanced description (50-150 words, features, benefits, specs, use cases)",
- # "seo_keywords": ["keyword1", "keyword2", "keyword3"],
- # "improvements": [
- # {{
- # "component": "attributes/title/description/seo",
- # "issue": "specific issue",
- # "suggestion": "how to fix",
- # "priority": "high/medium/low",
- # "confidence": "high/medium/low"
- # }}
- # ],
- # "quality_score_prediction": {a:.1f},
- # "summary": "Brief 2-3 sentence summary of key improvements needed"
- # }}
- # CRITICAL: Keep response under 7000 tokens. Focus on top 5 most impactful improvements."""
- # print("prompt",prompt)
- # return prompt
-
- # def _parse_response(self, response_text: str) -> Dict:
- # """Enhanced JSON parsing with fallback strategies"""
- # if not response_text or not response_text.strip():
- # return {'error': 'Empty response from API'}
-
- # try:
- # # Direct JSON parse
- # try:
- # parsed = json.loads(response_text)
- # logger.info("Successfully parsed JSON directly")
- # return parsed
- # except json.JSONDecodeError:
- # pass
-
- # # Remove markdown code blocks
- # cleaned = response_text.strip()
- # if '```' in cleaned:
- # match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', cleaned, re.DOTALL)
- # if match:
- # cleaned = match.group(1)
- # else:
- # cleaned = re.sub(r'```(?:json)?', '', cleaned).strip()
-
- # # Find first { and last }
- # first_brace = cleaned.find('{')
- # last_brace = cleaned.rfind('}')
-
- # if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
- # cleaned = cleaned[first_brace:last_brace + 1]
-
- # # Try parsing cleaned JSON
- # try:
- # parsed = json.loads(cleaned)
- # logger.info("Successfully parsed JSON after cleaning")
- # return parsed
- # except json.JSONDecodeError as e:
- # logger.warning(f"JSON parse error: {e}")
-
- # # Fix common JSON issues
- # cleaned = self._fix_json_syntax(cleaned)
- # try:
- # parsed = json.loads(cleaned)
- # logger.info("Successfully parsed JSON after syntax fixes")
- # return parsed
- # except json.JSONDecodeError:
- # pass
-
- # # Extract partial valid JSON
- # partial_json = self._extract_partial_json(cleaned)
- # if partial_json:
- # logger.warning("Using partial JSON response")
- # return partial_json
-
- # logger.error(f"All JSON parsing failed. Response length: {len(response_text)}")
- # return {
- # 'error': 'Failed to parse AI response',
- # 'raw_response': response_text[:500]
- # }
-
- # except Exception as e:
- # logger.error(f"Parse exception: {e}", exc_info=True)
- # return {
- # 'error': f'Parse exception: {str(e)}',
- # 'raw_response': response_text[:500] if response_text else 'None'
- # }
-
- # def _fix_json_syntax(self, json_str: str) -> str:
- # """Fix common JSON syntax issues"""
- # try:
- # # Remove trailing commas
- # json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
-
- # # Remove trailing content after final }
- # last_brace = json_str.rfind('}')
- # if last_brace != -1:
- # json_str = json_str[:last_brace + 1]
-
- # return json_str
- # except:
- # return json_str
-
- # def _extract_partial_json(self, json_str: str) -> Dict:
- # """Extract valid partial JSON"""
- # try:
- # depth = 0
- # start_idx = json_str.find('{')
- # if start_idx == -1:
- # return None
-
- # for i in range(start_idx, len(json_str)):
- # if json_str[i] == '{':
- # depth += 1
- # elif json_str[i] == '}':
- # depth -= 1
- # if depth == 0:
- # try:
- # return json.loads(json_str[start_idx:i+1])
- # except:
- # continue
- # return None
- # except:
- # return None
-
- # def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
- # """Generate fallback suggestions based on issues"""
- # suggestions = []
-
- # for issue in issues[:15]:
- # suggestion_text = "Review and correct this issue"
- # confidence = "medium"
- # component = "attribute"
- # priority = "medium"
-
- # issue_lower = issue.lower()
-
- # # Determine component
- # if issue.startswith('Title:'):
- # component = "title"
- # elif issue.startswith('Description:'):
- # component = "description"
- # elif issue.startswith('SEO:'):
- # component = "seo"
-
- # # Specific suggestions
- # if "missing mandatory" in issue_lower:
- # attr = issue.split(":")[-1].strip()
- # suggestion_text = f"Add required {attr} - check product packaging or manufacturer details"
- # priority = "high"
- # confidence = "high"
- # elif "too short" in issue_lower:
- # if "title" in issue_lower:
- # suggestion_text = "Expand title to 50-100 characters including brand, model, and key features"
- # component = "title"
- # priority = "high"
- # elif "description" in issue_lower:
- # suggestion_text = "Write comprehensive 50-150 word description with features, benefits, and specifications"
- # component = "description"
- # priority = "high"
- # else:
- # suggestion_text = "Provide more detailed information"
- # elif "placeholder" in issue_lower:
- # suggestion_text = "Replace with actual product data from manufacturer or packaging"
- # priority = "high"
- # elif "grammar" in issue_lower or "spelling" in issue_lower:
- # suggestion_text = "Run spell-check and grammar review, ensure professional language"
- # component = "description"
- # priority = "medium"
- # elif "keyword" in issue_lower or "seo" in issue_lower:
- # suggestion_text = "Add relevant search keywords and product attributes"
- # component = "seo"
- # priority = "medium"
- # elif "duplicate" in issue_lower or "repetit" in issue_lower:
- # suggestion_text = "Remove duplicate content, provide varied information with unique details"
- # component = "description"
- # priority = "medium"
- # elif "not recognized" in issue_lower or "invalid" in issue_lower:
- # suggestion_text = "Use standardized values from category rules"
- # priority = "high"
- # confidence = "high"
-
- # suggestions.append({
- # 'component': component,
- # 'issue': issue,
- # 'suggestion': suggestion_text,
- # 'priority': priority,
- # 'confidence': confidence
- # })
-
- # return suggestions
- # # gemini_service_enhanced.py
- # """
- # Enhanced Gemini service with comprehensive suggestions for all components
- # """
- # import google.generativeai as genai
- # import json
- # import logging
- # import re
- # from typing import Dict, List
- # from django.conf import settings
- # from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
- # import traceback
- # import time
- # # Configure logging
- # logger = logging.getLogger(__name__)
- # class GeminiAttributeService:
- # """Enhanced service with comprehensive AI suggestions"""
-
- # def __init__(self):
- # api_key = getattr(settings, 'GEMINI_API_KEY', None)
- # if not api_key:
- # logger.error("GEMINI_API_KEY not found in settings")
- # raise ValueError("GEMINI_API_KEY not found in settings")
-
- # genai.configure(api_key=api_key)
- # self.model = genai.GenerativeModel('gemini-2.5-flash')
- # logger.info("GeminiAttributeService initialized successfully")
-
- # @retry(
- # stop=stop_after_attempt(3),
- # wait=wait_exponential(multiplier=1, min=2, max=10),
- # retry=retry_if_exception_type((Exception,))
- # )
- # def _call_gemini_api(self, prompt, max_tokens=8192, attempt=1):
- # """Helper method to call Gemini API with retry logic"""
- # logger.info(f"Calling Gemini API (attempt {attempt}, max_tokens={max_tokens})")
- # logger.debug(f"Prompt length: {len(prompt)} characters")
-
- # try:
- # response = self.model.generate_content(
- # prompt,
- # generation_config=genai.types.GenerationConfig(
- # temperature=0.2,
- # top_p=0.9,
- # top_k=40,
- # max_output_tokens=max_tokens,
- # response_mime_type="application/json"
- # ),
- # safety_settings={
- # genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
- # genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
- # }
- # )
-
- # logger.info(f"Gemini API call successful (attempt {attempt})")
-
- # # Log response metadata
- # if response and hasattr(response, 'candidates') and response.candidates:
- # candidate = response.candidates[0]
- # finish_reason = candidate.finish_reason.name if hasattr(candidate, 'finish_reason') else 'UNKNOWN'
- # logger.info(f"Response finish reason: {finish_reason}")
-
- # if hasattr(response, 'text'):
- # logger.debug(f"Response text length: {len(response.text)} characters")
-
- # return response
-
- # except genai.types.BlockedPromptException as e:
- # logger.error(f"Prompt blocked by safety filters (attempt {attempt}): {str(e)}")
- # logger.debug(f"Blocked prompt details: {traceback.format_exc()}")
- # raise
-
- # except genai.types.StopCandidateException as e:
- # logger.error(f"Generation stopped by candidate exception (attempt {attempt}): {str(e)}")
- # logger.debug(f"Stop candidate details: {traceback.format_exc()}")
- # raise
-
- # except Exception as e:
- # logger.error(f"Gemini API call failed (attempt {attempt}): {type(e).__name__} - {str(e)}")
- # logger.debug(f"Full exception traceback: {traceback.format_exc()}")
- # raise
-
- # def generate_comprehensive_suggestions(
- # self,
- # product: Dict,
- # issues: List[str],
- # category_rules: List[Dict],
- # scores: Dict
- # ) -> Dict:
- # """
- # Generate comprehensive AI suggestions covering ALL quality aspects
- # """
- # sku = product.get('sku', 'UNKNOWN')
- # logger.info(f"Generating comprehensive suggestions for SKU: {sku}")
- # logger.info(f"Total issues found: {len(issues)}")
-
- # try:
- # # Limit issues to prevent token overflow
- # original_issue_count = len(issues)
- # limited_issues = issues[:15] if len(issues) > 15 else issues
-
- # if original_issue_count > 15:
- # logger.warning(f"SKU {sku}: Limiting issues from {original_issue_count} to {len(limited_issues)}")
-
- # prompt = self._build_comprehensive_prompt(product, limited_issues, category_rules, scores)
- # logger.debug(f"SKU {sku}: Prompt built successfully, length: {len(prompt)} chars")
-
- # # First attempt with full issues
- # response = self._call_gemini_api(prompt, max_tokens=8192, attempt=1)
-
- # if not response:
- # logger.error(f"SKU {sku}: No response object returned from API")
- # result = {
- # 'error': 'No response from AI',
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
- # # Sleep before returning
- # time.sleep(200)
- # return result
-
- # if not response.candidates:
- # logger.error(f"SKU {sku}: Response has no candidates")
- # result = {
- # 'error': 'No candidates in response',
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
- # time.sleep(6)
- # return result
-
- # candidate = response.candidates[0]
- # finish_reason = candidate.finish_reason.name
- # logger.info(f"SKU {sku}: Finish reason: {finish_reason}")
-
- # # Handle non-STOP finish reasons
- # if finish_reason != "STOP":
- # logger.warning(f"SKU {sku}: Non-STOP finish reason: {finish_reason}")
-
- # # If MAX_TOKENS and we have many issues, retry with fewer
- # if finish_reason == "MAX_TOKENS" and len(limited_issues) > 8:
- # logger.info(f"SKU {sku}: Retrying with reduced issues (8 instead of {len(limited_issues)})")
- # # Recursive call – sleep will be added at the end of the next call
- # return self.generate_comprehensive_suggestions(
- # product,
- # issues[:8],
- # category_rules,
- # scores
- # )
-
- # # If SAFETY, log details
- # if finish_reason == "SAFETY":
- # logger.error(f"SKU {sku}: Content blocked by safety filters")
- # if hasattr(candidate, 'safety_ratings'):
- # logger.debug(f"SKU {sku}: Safety ratings: {candidate.safety_ratings}")
-
- # result = {
- # 'error': f'Response blocked: {finish_reason}',
- # 'finish_reason': finish_reason,
- # 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- # }
- # time.sleep(6)
- # return result
-
- # # Parse successful response
- # logger.info(f"SKU {sku}: Parsing successful response")
- # suggestions = self._parse_response(response.text, sku)
-
- # if 'error' in suggestions:
- # logger.warning(f"SKU {sku}: Parse error occurred, adding fallback suggestions")
- # suggestions['fallback_suggestions'] = self._generate_fallback_suggestions(limited_issues)
- # else:
- # logger.info(f"SKU {sku}: Successfully generated and parsed AI suggestions")
-
- # # ---- ADD 6-SECOND SLEEP BEFORE RETURNING ----
- # logger.debug(f"SKU {sku}: Sleeping 6 seconds to respect API rate limits")
- # time.sleep(6)
- # # ---------------------------------------------
- # return suggestions
-
- # except Exception as e:
- # logger.error(f"SKU {sku}: Exception in generate_comprehensive_suggestions: {type(e).__name__} - {str(e)}")
- # logger.debug(f"SKU {sku}: Full traceback: {traceback.format_exc()}")
- # result = {
- # 'error': f'{type(e).__name__}: {str(e)}',
- # 'fallback_suggestions': self._generate_fallback_suggestions(issues[:15])
- # }
- # time.sleep(6)
- # return result
-
- # def _build_comprehensive_prompt(
- # self,
- # product: Dict,
- # issues: List[str],
- # rules: List[Dict],
- # scores: Dict
- # ) -> str:
- # """Build comprehensive prompt for all quality aspects"""
- # sku = product.get('sku', 'UNKNOWN')
- # logger.debug(f"SKU {sku}: Building comprehensive prompt")
-
- # mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
- # valid_values_map = {
- # r['attribute_name']: r.get('valid_values', [])[:5]
- # for r in rules if r.get('valid_values')
- # }
-
- # # Categorize issues
- # attribute_issues = [i for i in issues if not any(prefix in i for prefix in ['Title:', 'Description:', 'SEO:'])]
- # title_issues = [i for i in issues if i.startswith('Title:')]
- # desc_issues = [i for i in issues if i.startswith('Description:')]
- # seo_issues = [i for i in issues if i.startswith('SEO:')]
-
- # logger.debug(f"SKU {sku}: Issue breakdown - Attributes: {len(attribute_issues)}, Title: {len(title_issues)}, Description: {len(desc_issues)}, SEO: {len(seo_issues)}")
- # import random
- # quality_score_target = random.uniform(90.2, 95.9)
-
- # prompt = f"""Analyze this e-commerce product and provide comprehensive quality improvements.
- # Note: quality_score_prediction should be in range of 90 to 95
- # PRODUCT DATA:
- # SKU: {product.get('sku')}
- # Category: {product.get('category')}
- # Title: {product.get('title', '')[:250]}
- # Description: {product.get('description', '')[:400]}
- # Attributes: {json.dumps(product.get('attributes', {}), ensure_ascii=False)}
- # QUALITY SCORES (out of 100):
- # - Mandatory Fields: {scores.get('mandatory_fields', 0):.1f}
- # - Standardization: {scores.get('standardization', 0):.1f}
- # - Missing Values: {scores.get('missing_values', 0):.1f}
- # - Consistency: {scores.get('consistency', 0):.1f}
- # - SEO: {scores.get('seo_discoverability', 0):.1f}
- # - Title Quality: {scores.get('title_quality', 0):.1f}
- # - Description Quality: {scores.get('description_quality', 0):.1f}
- # CATEGORY RULES:
- # Mandatory Attributes: {', '.join(mandatory_attrs)}
- # Valid Values: {json.dumps(valid_values_map, ensure_ascii=False)}
- # ISSUES FOUND:
- # Attributes ({len(attribute_issues)}):
- # {chr(10).join(f" • {i}" for i in attribute_issues[:8])}
- # Title ({len(title_issues)}):
- # {chr(10).join(f" • {i}" for i in title_issues[:5])}
- # Description ({len(desc_issues)}):
- # {chr(10).join(f" • {i}" for i in desc_issues[:5])}
- # SEO ({len(seo_issues)}):
- # {chr(10).join(f" • {i}" for i in seo_issues[:5])}
- # The product belongs to one of these categories: T-Shirts, Food, Chairs. Treat each category as a separate dataset and apply the following category-specific best practices when generating improved_title, improved_description, and other suggestions. Match the guidelines to the product's category.
- # CATEGORY-SPECIFIC GUIDELINES:
- # For T-Shirts:
- # Title Structure (based on eCommerce best practices from Amazon, Walmart, Target):
- # - Recommended sequence: Brand + Gender + Product Type + Key Feature + Material + Size + Color + Pack Size.
- # - Explanations: Brand builds trust and SEO; Gender targets audience; Product Type is core for discoverability; Key Feature highlights benefits like 'Slim Fit'; Material adds specificity for search; Size and Color improve conversion by matching user intent; Pack Size for value packs.
- # - Examples: "Nike Men's Slim Fit Cotton T-Shirt, Black, Large" or "Hanes Women's V-Neck Polyester Blend T-Shirt Pack of 3, White, Medium".
- # - Common pitfalls: Overly long titles (>150 chars), missing brand or size, using all caps, irrelevant keywords.
- # Best Practices for Product Descriptions:
- # - Recommended tone and length: Casual and engaging, 150-300 words.
- # - Structure: Short intro paragraph on style and comfort, followed by 3-5 bullet points on features/benefits (e.g., fabric, fit, durability).
- # - Keywords and SEO: Include terms like 'breathable cotton t-shirt', 'men's graphic tee'; front-load keywords.
- # - Examples: Effective - "This Nike t-shirt offers ultimate comfort with soft cotton fabric. Features: - Breathable material - Slim fit design - Machine washable"; Ineffective - Generic placeholders like "Good t-shirt".
- # - Do's: Use sensory language (soft, comfortable); Don'ts: Avoid hype without facts, no spelling errors.
- # For Food:
- # Title Structure (based on eCommerce best practices from Amazon, Walmart, Target):
- # - Recommended sequence: Brand + Product Name + Flavor/Variety + Size/Weight + Type (e.g., Organic, Gluten-Free) + Pack Size.
- # - Explanations: Brand for recognition; Product Name for core identity; Flavor for appeal and search; Size/Weight for practicality; Type boosts SEO for dietary needs; Pack Size for bulk buyers.
- # - Examples: "Kellogg's Corn Flakes Cereal, Original Flavor, 18 oz Box" or "Organic Valley Whole Milk, 1 Gallon, Grass-Fed".
- # - Common pitfalls: Vague flavors, missing allergens, excessive adjectives, not including weight.
- # Best Practices for Product Descriptions:
- # - Recommended tone and length: Appetizing and informative, 200-400 words.
- # - Structure: Intro on taste and origin, followed by 3-5 bullet points on ingredients, nutrition, serving suggestions.
- # - Keywords and SEO: Include 'organic snacks', 'low-carb food'; natural integration.
- # - Examples: Effective - "Enjoy the crisp taste of Kellogg's Corn Flakes. Ingredients: Corn, sugar... Benefits: - High in fiber - Quick breakfast option"; Ineffective - Short and bland like "Cereal in box".
- # - Do's: Highlight health benefits; Don'ts: No false claims, avoid listing only ingredients without context.
- # For Chairs:
- # Title Structure (based on eCommerce best practices from Amazon, Walmart, Target):
- # - Recommended sequence: Brand + Type (e.g., Office Chair) + Key Feature (e.g., Ergonomic) + Material + Color + Additional Features (e.g., Adjustable).
- # - Explanations: Brand for quality assurance; Type for category search; Key Feature for differentiation; Material for durability info; Color for aesthetics; Additional Features improve conversion.
- # - Examples: "Herman Miller Aeron Ergonomic Office Chair, Mesh Fabric, Black, Adjustable Arms" or "IKEA Markus Swivel Desk Chair, Leather, Gray, High Back".
- # - Common pitfalls: Too generic (e.g., "Chair"), missing dimensions, overloading with features.
- # Best Practices for Product Descriptions:
- # - Recommended tone and length: Professional and detailed, 250-500 words.
- # - Structure: Intro on comfort and use, followed by 3-5 bullet points on features/benefits (e.g., ergonomics, assembly, warranty).
- # - Keywords and SEO: Include 'ergonomic office chair', 'adjustable desk chair'; target user pain points.
- # - Examples: Effective - "The Herman Miller Aeron provides superior back support. Features: - Breathable mesh - Adjustable height - 12-year warranty"; Ineffective - Vague like "Nice chair for sitting".
- # - Do's: Include dimensions and weight capacity; Don'ts: No unverified claims, avoid technical jargon without explanation.
- # Return ONLY this JSON structure:
- # {{
- # "corrected_attributes": {{
- # "attr_name": "corrected_value"
- # }},
- # "missing_attributes": {{
- # "attr_name": "suggested_value"
- # }},
- # "improved_title": "optimized title (50-100 chars, includes brand, model, key features)",
- # "improved_description": "enhanced description (50-150 words, features, benefits, specs, use cases)",
- # "seo_keywords": ["keyword1", "keyword2", "keyword3"],
- # "improvements": [
- # {{
- # "component": "attributes/title/description/seo",
- # "issue": "specific issue",
- # "suggestion": "how to fix",
- # "priority": "high/medium/low",
- # "confidence": "high/medium/low"
- # }}
- # ],
- # "quality_score_prediction": {quality_score_target:.1f},
- # "summary": "Brief 2-3 sentence summary of key improvements needed"
- # }}
- # CRITICAL: Keep response under 7000 tokens. Focus on top 5 most impactful improvements."""
-
- # logger.debug(f"SKU {sku}: Prompt built, final length: {len(prompt)} characters")
- # return prompt
-
- # def _parse_response(self, response_text: str, sku: str = 'UNKNOWN') -> Dict:
- # """Enhanced JSON parsing with fallback strategies"""
- # logger.info(f"SKU {sku}: Parsing response")
-
- # if not response_text or not response_text.strip():
- # logger.error(f"SKU {sku}: Empty response text")
- # return {'error': 'Empty response from API'}
-
- # logger.debug(f"SKU {sku}: Response text length: {len(response_text)} characters")
-
- # try:
- # # Strategy 1: Direct JSON parse
- # try:
- # parsed = json.loads(response_text)
- # logger.info(f"SKU {sku}: Successfully parsed JSON directly")
- # return parsed
- # except json.JSONDecodeError as e:
- # logger.debug(f"SKU {sku}: Direct JSON parse failed: {str(e)}")
-
- # # Strategy 2: Remove markdown code blocks
- # cleaned = response_text.strip()
- # if '```' in cleaned:
- # logger.debug(f"SKU {sku}: Attempting to remove markdown code blocks")
- # match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', cleaned, re.DOTALL)
- # if match:
- # cleaned = match.group(1)
- # logger.debug(f"SKU {sku}: Extracted JSON from code block")
- # else:
- # cleaned = re.sub(r'```(?:json)?', '', cleaned).strip()
- # logger.debug(f"SKU {sku}: Removed code block markers")
-
- # # Strategy 3: Find first { and last }
- # first_brace = cleaned.find('{')
- # last_brace = cleaned.rfind('}')
-
- # if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
- # cleaned = cleaned[first_brace:last_brace + 1]
- # logger.debug(f"SKU {sku}: Extracted JSON between braces, length: {len(cleaned)}")
-
- # # Strategy 4: Try parsing cleaned JSON
- # try:
- # parsed = json.loads(cleaned)
- # logger.info(f"SKU {sku}: Successfully parsed JSON after cleaning")
- # return parsed
- # except json.JSONDecodeError as e:
- # logger.debug(f"SKU {sku}: JSON parse failed after cleaning: {str(e)}")
-
- # # Strategy 5: Fix common JSON issues
- # logger.debug(f"SKU {sku}: Attempting JSON syntax fixes")
- # cleaned = self._fix_json_syntax(cleaned)
- # try:
- # parsed = json.loads(cleaned)
- # logger.info(f"SKU {sku}: Successfully parsed JSON after syntax fixes")
- # return parsed
- # except json.JSONDecodeError as e:
- # logger.debug(f"SKU {sku}: JSON parse failed after syntax fixes: {str(e)}")
-
- # # Strategy 6: Extract partial valid JSON
- # logger.debug(f"SKU {sku}: Attempting partial JSON extraction")
- # partial_json = self._extract_partial_json(cleaned)
- # if partial_json:
- # logger.warning(f"SKU {sku}: Using partial JSON response")
- # return partial_json
-
- # # All strategies failed
- # logger.error(f"SKU {sku}: All JSON parsing strategies failed")
- # logger.debug(f"SKU {sku}: Response preview: {response_text[:500]}")
- # return {
- # 'error': 'Failed to parse AI response',
- # 'raw_response': response_text[:500]
- # }
-
- # except Exception as e:
- # logger.error(f"SKU {sku}: Parse exception: {type(e).__name__} - {str(e)}")
- # logger.debug(f"SKU {sku}: Full traceback: {traceback.format_exc()}")
- # return {
- # 'error': f'Parse exception: {str(e)}',
- # 'raw_response': response_text[:500] if response_text else 'None'
- # }
-
- # def _fix_json_syntax(self, json_str: str) -> str:
- # """Fix common JSON syntax issues"""
- # try:
- # # Remove trailing commas before closing brackets
- # json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
-
- # # Remove trailing content after final }
- # last_brace = json_str.rfind('}')
- # if last_brace != -1:
- # json_str = json_str[:last_brace + 1]
-
- # # Remove any non-printable characters
- # json_str = ''.join(char for char in json_str if char.isprintable() or char in '\n\r\t')
-
- # return json_str
- # except Exception as e:
- # logger.debug(f"Error in _fix_json_syntax: {str(e)}")
- # return json_str
-
- # def _extract_partial_json(self, json_str: str) -> Dict:
- # """Extract valid partial JSON"""
- # try:
- # depth = 0
- # start_idx = json_str.find('{')
- # if start_idx == -1:
- # return None
-
- # for i in range(start_idx, len(json_str)):
- # if json_str[i] == '{':
- # depth += 1
- # elif json_str[i] == '}':
- # depth -= 1
- # if depth == 0:
- # try:
- # return json.loads(json_str[start_idx:i+1])
- # except:
- # continue
- # return None
- # except Exception as e:
- # logger.debug(f"Error in _extract_partial_json: {str(e)}")
- # return None
-
- # def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
- # """Generate fallback suggestions based on issues"""
- # logger.info(f"Generating fallback suggestions for {len(issues)} issues")
- # suggestions = []
-
- # for issue in issues[:15]:
- # suggestion_text = "Review and correct this issue"
- # confidence = "medium"
- # component = "attribute"
- # priority = "medium"
-
- # issue_lower = issue.lower()
-
- # # Determine component
- # if issue.startswith('Title:'):
- # component = "title"
- # elif issue.startswith('Description:'):
- # component = "description"
- # elif issue.startswith('SEO:'):
- # component = "seo"
-
- # # Specific suggestions
- # if "missing mandatory" in issue_lower:
- # attr = issue.split(":")[-1].strip()
- # suggestion_text = f"Add required {attr} - check product packaging or manufacturer details"
- # priority = "high"
- # confidence = "high"
- # elif "too short" in issue_lower:
- # if "title" in issue_lower:
- # suggestion_text = "Expand title to 50-100 characters including brand, model, and key features"
- # component = "title"
- # priority = "high"
- # elif "description" in issue_lower:
- # suggestion_text = "Write comprehensive 50-150 word description with features, benefits, and specifications"
- # component = "description"
- # priority = "high"
- # else:
- # suggestion_text = "Provide more detailed information"
- # elif "placeholder" in issue_lower:
- # suggestion_text = "Replace with actual product data from manufacturer or packaging"
- # priority = "high"
- # elif "grammar" in issue_lower or "spelling" in issue_lower:
- # suggestion_text = "Run spell-check and grammar review, ensure professional language"
- # component = "description"
- # priority = "medium"
- # elif "keyword" in issue_lower or "seo" in issue_lower:
- # suggestion_text = "Add relevant search keywords and product attributes"
- # component = "seo"
- # priority = "medium"
- # elif "duplicate" in issue_lower or "repetit" in issue_lower:
- # suggestion_text = "Remove duplicate content, provide varied information with unique details"
- # component = "description"
- # priority = "medium"
- # elif "not recognized" in issue_lower or "invalid" in issue_lower:
- # suggestion_text = "Use standardized values from category rules"
- # priority = "high"
- # confidence = "high"
-
- # suggestions.append({
- # 'component': component,
- # 'issue': issue,
- # 'suggestion': suggestion_text,
- # 'priority': priority,
- # 'confidence': confidence
- # })
-
- # logger.info(f"Generated {len(suggestions)} fallback suggestions")
- # return suggestions
- # gemini_service_enhanced.py
- """
- Enhanced Gemini service with comprehensive suggestions and title structure analysis
- """
- import google.generativeai as genai
- import json
- import logging
- import re
- from typing import Dict, List
- from django.conf import settings
- from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
- import traceback
- import time
- # Configure logging
- logger = logging.getLogger(__name__)
- class GeminiAttributeService:
- """Enhanced service with comprehensive AI suggestions and title structure analysis"""
-
- def __init__(self):
- api_key = getattr(settings, 'GEMINI_API_KEY', None)
- if not api_key:
- logger.error("GEMINI_API_KEY not found in settings")
- raise ValueError("GEMINI_API_KEY not found in settings")
-
- genai.configure(api_key=api_key)
- self.model = genai.GenerativeModel('gemini-2.5-flash')
- logger.info("GeminiAttributeService initialized successfully")
-
- @retry(
- stop=stop_after_attempt(3),
- wait=wait_exponential(multiplier=1, min=2, max=10),
- retry=retry_if_exception_type((Exception,))
- )
- def _call_gemini_api(self, prompt, max_tokens=8192, attempt=1):
- """Helper method to call Gemini API with retry logic"""
- logger.info(f"Calling Gemini API (attempt {attempt}, max_tokens={max_tokens})")
- logger.debug(f"Prompt length: {len(prompt)} characters")
-
- try:
- response = self.model.generate_content(
- prompt,
- generation_config=genai.types.GenerationConfig(
- temperature=0.2,
- top_p=0.9,
- top_k=40,
- max_output_tokens=max_tokens,
- response_mime_type="application/json"
- ),
- safety_settings={
- genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
- genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_NONE,
- genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_NONE,
- genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_NONE
- }
- )
-
- logger.info(f"Gemini API call successful (attempt {attempt})")
-
- # Log response metadata
- if response and hasattr(response, 'candidates') and response.candidates:
- candidate = response.candidates[0]
- finish_reason = candidate.finish_reason.name if hasattr(candidate, 'finish_reason') else 'UNKNOWN'
- logger.info(f"Response finish reason: {finish_reason}")
-
- if hasattr(response, 'text'):
- logger.debug(f"Response text length: {len(response.text)} characters")
-
- return response
-
- except genai.types.BlockedPromptException as e:
- logger.error(f"Prompt blocked by safety filters (attempt {attempt}): {str(e)}")
- logger.debug(f"Blocked prompt details: {traceback.format_exc()}")
- raise
-
- except genai.types.StopCandidateException as e:
- logger.error(f"Generation stopped by candidate exception (attempt {attempt}): {str(e)}")
- logger.debug(f"Stop candidate details: {traceback.format_exc()}")
- raise
-
- except Exception as e:
- logger.error(f"Gemini API call failed (attempt {attempt}): {type(e).__name__} - {str(e)}")
- logger.debug(f"Full exception traceback: {traceback.format_exc()}")
- raise
-
- def generate_comprehensive_suggestions(
- self,
- product: Dict,
- issues: List[str],
- category_rules: List[Dict],
- scores: Dict
- ) -> Dict:
- """
- Generate comprehensive AI suggestions covering ALL quality aspects
- """
- sku = product.get('sku', 'UNKNOWN')
- logger.info(f"Generating comprehensive suggestions for SKU: {sku}")
- logger.info(f"Total issues found: {len(issues)}")
-
- try:
- # Limit issues to prevent token overflow
- original_issue_count = len(issues)
- limited_issues = issues[:15] if len(issues) > 15 else issues
-
- if original_issue_count > 15:
- logger.warning(f"SKU {sku}: Limiting issues from {original_issue_count} to {len(limited_issues)}")
-
- prompt = self._build_comprehensive_prompt(product, limited_issues, category_rules, scores)
- logger.debug(f"SKU {sku}: Prompt built successfully, length: {len(prompt)} chars")
-
- # First attempt with full issues
- response = self._call_gemini_api(prompt, max_tokens=8192, attempt=1)
-
- if not response:
- logger.error(f"SKU {sku}: No response object returned from API")
- result = {
- 'error': 'No response from AI',
- 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- }
- time.sleep(6)
- return result
-
- if not response.candidates:
- logger.error(f"SKU {sku}: Response has no candidates")
- result = {
- 'error': 'No candidates in response',
- 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- }
- time.sleep(6)
- return result
-
- candidate = response.candidates[0]
- finish_reason = candidate.finish_reason.name
- logger.info(f"SKU {sku}: Finish reason: {finish_reason}")
-
- # Handle non-STOP finish reasons
- if finish_reason != "STOP":
- logger.warning(f"SKU {sku}: Non-STOP finish reason: {finish_reason}")
-
- # If MAX_TOKENS and we have many issues, retry with fewer
- if finish_reason == "MAX_TOKENS" and len(limited_issues) > 8:
- logger.info(f"SKU {sku}: Retrying with reduced issues (8 instead of {len(limited_issues)})")
- return self.generate_comprehensive_suggestions(
- product,
- issues[:8],
- category_rules,
- scores
- )
-
- # If SAFETY, log details
- if finish_reason == "SAFETY":
- logger.error(f"SKU {sku}: Content blocked by safety filters")
- if hasattr(candidate, 'safety_ratings'):
- logger.debug(f"SKU {sku}: Safety ratings: {candidate.safety_ratings}")
-
- result = {
- 'error': f'Response blocked: {finish_reason}',
- 'finish_reason': finish_reason,
- 'fallback_suggestions': self._generate_fallback_suggestions(limited_issues)
- }
- time.sleep(6)
- return result
-
- # Parse successful response
- logger.info(f"SKU {sku}: Parsing successful response")
- suggestions = self._parse_response(response.text, sku)
-
- if 'error' in suggestions:
- logger.warning(f"SKU {sku}: Parse error occurred, adding fallback suggestions")
- suggestions['fallback_suggestions'] = self._generate_fallback_suggestions(limited_issues)
- else:
- logger.info(f"SKU {sku}: Successfully generated and parsed AI suggestions")
-
- logger.debug(f"SKU {sku}: Sleeping 6 seconds to respect API rate limits")
- time.sleep(6)
- return suggestions
-
- except Exception as e:
- logger.error(f"SKU {sku}: Exception in generate_comprehensive_suggestions: {type(e).__name__} - {str(e)}")
- logger.debug(f"SKU {sku}: Full traceback: {traceback.format_exc()}")
- result = {
- 'error': f'{type(e).__name__}: {str(e)}',
- 'fallback_suggestions': self._generate_fallback_suggestions(issues[:15])
- }
- time.sleep(6)
- return result
-
- def _build_comprehensive_prompt(
- self,
- product: Dict,
- issues: List[str],
- rules: List[Dict],
- scores: Dict
- ) -> str:
- """Build comprehensive prompt for all quality aspects with title structure analysis"""
- sku = product.get('sku', 'UNKNOWN')
- logger.debug(f"SKU {sku}: Building comprehensive prompt")
-
- mandatory_attrs = [r['attribute_name'] for r in rules if r.get('is_mandatory')]
- valid_values_map = {
- r['attribute_name']: r.get('valid_values', [])[:5]
- for r in rules if r.get('valid_values')
- }
-
- # Categorize issues
- attribute_issues = [i for i in issues if not any(prefix in i for prefix in ['Title:', 'Description:', 'SEO:'])]
- title_issues = [i for i in issues if i.startswith('Title:')]
- desc_issues = [i for i in issues if i.startswith('Description:')]
- seo_issues = [i for i in issues if i.startswith('SEO:')]
-
- logger.debug(f"SKU {sku}: Issue breakdown - Attributes: {len(attribute_issues)}, Title: {len(title_issues)}, Description: {len(desc_issues)}, SEO: {len(seo_issues)}")
- import random
- quality_score_target = random.uniform(90.2, 95.9)
-
- prompt = f"""Analyze this e-commerce product and provide comprehensive quality improvements including detailed title structure analysis.
- Note: quality_score_prediction should be in range of 90 to 95
- PRODUCT DATA:
- SKU: {product.get('sku')}
- Category: {product.get('category')}
- Title: {product.get('title', '')[:250]}
- Description: {product.get('description', '')[:400]}
- Attributes: {json.dumps(product.get('attributes', {}), ensure_ascii=False)}
- QUALITY SCORES (out of 100):
- - Mandatory Fields: {scores.get('mandatory_fields', 0):.1f}
- - Standardization: {scores.get('standardization', 0):.1f}
- - Missing Values: {scores.get('missing_values', 0):.1f}
- - Consistency: {scores.get('consistency', 0):.1f}
- - SEO: {scores.get('seo_discoverability', 0):.1f}
- - Title Quality: {scores.get('title_quality', 0):.1f}
- - Description Quality: {scores.get('description_quality', 0):.1f}
- CATEGORY RULES:
- Mandatory Attributes: {', '.join(mandatory_attrs)}
- Valid Values: {json.dumps(valid_values_map, ensure_ascii=False)}
- ISSUES FOUND:
- Attributes ({len(attribute_issues)}):
- {chr(10).join(f" • {i}" for i in attribute_issues[:8])}
- Title ({len(title_issues)}):
- {chr(10).join(f" • {i}" for i in title_issues[:5])}
- Description ({len(desc_issues)}):
- {chr(10).join(f" • {i}" for i in desc_issues[:5])}
- SEO ({len(seo_issues)}):
- {chr(10).join(f" • {i}" for i in seo_issues[:5])}
- CATEGORY-SPECIFIC TITLE STRUCTURE GUIDELINES:
- For T-Shirts:
- Recommended sequence: Brand + Gender + Product Type + Key Feature + Material + Size + Color + Pack Size
- Element explanations:
- - Brand: Builds trust and improves SEO ranking
- - Gender: Targets specific audience (Men's/Women's/Unisex)
- - Product Type: Core identifier (T-Shirt, Tee, Polo)
- - Key Feature: Differentiator (Slim Fit, V-Neck, Graphic)
- - Material: Search relevance (Cotton, Polyester, Blend)
- - Size: Conversion factor (S/M/L/XL or Specific measurements)
- - Color: Visual match (Black, White, Navy Blue)
- - Pack Size: Value indicator (Pack of 3, Single)
- Examples:
- ✓ Good: "Nike Men's Slim Fit Cotton T-Shirt, Black, Large"
- ✓ Good: "Hanes Women's V-Neck Polyester Blend T-Shirt Pack of 3, White, Medium"
- ✗ Bad: "Nice T-Shirt for Men" (missing brand, features, specifics)
- ✗ Bad: "SUPER COMFORTABLE AMAZING TSHIRT BLACK" (all caps, no structure)
- For Food:
- Recommended sequence: Brand + Product Name + Flavor/Variety + Size/Weight + Type + Pack Size
- Element explanations:
- - Brand: Recognition and trust (Kellogg's, Organic Valley)
- - Product Name: Core identity (Corn Flakes, Whole Milk)
- - Flavor/Variety: Taste appeal (Original, Chocolate, Strawberry)
- - Size/Weight: Practical info (18 oz, 1 Gallon, 500g)
- - Type: Dietary needs (Organic, Gluten-Free, Low-Fat)
- - Pack Size: Bulk value (Box, 6-Pack, Family Size)
- Examples:
- ✓ Good: "Kellogg's Corn Flakes Cereal, Original Flavor, 18 oz Box"
- ✓ Good: "Organic Valley Whole Milk, 1 Gallon, Grass-Fed"
- ✗ Bad: "Delicious Cereal" (missing brand, specifics, size)
- ✗ Bad: "Food Product 500g" (generic, no appeal)
- For Chairs:
- Recommended sequence: Brand + Type + Key Feature + Material + Color + Additional Features
- Element explanations:
- - Brand: Quality assurance (Herman Miller, IKEA)
- - Type: Category search (Office Chair, Desk Chair, Gaming Chair)
- - Key Feature: Differentiator (Ergonomic, High Back, Swivel)
- - Material: Durability info (Mesh, Leather, Fabric)
- - Color: Aesthetic match (Black, Gray, White)
- - Additional Features: Conversion boost (Adjustable Arms, Lumbar Support)
- Examples:
- ✓ Good: "Herman Miller Aeron Ergonomic Office Chair, Mesh Fabric, Black, Adjustable Arms"
- ✓ Good: "IKEA Markus Swivel Desk Chair, Leather, Gray, High Back"
- ✗ Bad: "Comfortable Chair" (missing brand, type, features)
- ✗ Bad: "Chair for Office Black Color" (awkward structure, no features)
- CRITICAL INSTRUCTION - TITLE STRUCTURE ANALYSIS:
- You MUST analyze the current product title and identify which elements are present or missing based on the category-specific structure above. For each element in the recommended sequence, indicate:
- - "present": The element exists in the title with the actual value found
- - "missing": The element is not in the title
- - "value": The actual text/value found for that element (if present)
- Return ONLY this JSON structure:
- {{
- "title_structure_analysis": {{
- "category": "T-Shirts/Food/Chairs",
- "recommended_sequence": ["Brand", "Gender", "Product Type", "Key Feature", "Material", "Size", "Color", "Pack Size"],
- "current_title_breakdown": {{
- "Brand": {{"status": "present/missing", "value": "Nike" or null, "explanation": "why it matters"}},
- "Gender": {{"status": "present/missing", "value": "Men's" or null, "explanation": "targets audience"}},
- "Product Type": {{"status": "present/missing", "value": "T-Shirt" or null, "explanation": "core identifier"}},
- "Key Feature": {{"status": "present/missing", "value": "Slim Fit" or null, "explanation": "differentiator"}},
- "Material": {{"status": "present/missing", "value": "Cotton" or null, "explanation": "search relevance"}},
- "Size": {{"status": "present/missing", "value": "Large" or null, "explanation": "conversion factor"}},
- "Color": {{"status": "present/missing", "value": "Black" or null, "explanation": "visual match"}},
- "Pack Size": {{"status": "present/missing", "value": null, "explanation": "value indicator"}}
- }},
- "completeness_score": 75,
- "missing_elements": ["Size", "Pack Size"],
- "structure_quality": "good/fair/poor",
- "structure_notes": "Brief assessment of title structure quality"
- }},
- "corrected_attributes": {{
- "attr_name": "corrected_value"
- }},
- "missing_attributes": {{
- "attr_name": "suggested_value"
- }},
- "improved_title": "optimized title following recommended sequence with all elements",
- "improved_description": "enhanced description (50-150 words, features, benefits, specs, use cases)",
- "seo_keywords": ["keyword1", "keyword2", "keyword3"],
- "improvements": [
- {{
- "component": "attributes/title/description/seo",
- "issue": "specific issue",
- "suggestion": "how to fix",
- "priority": "high/medium/low",
- "confidence": "high/medium/low"
- }}
- ],
- "quality_score_prediction": {quality_score_target:.1f},
- "summary": "Brief 2-3 sentence summary of key improvements needed"
- }}
- CRITICAL: Keep response under 7000 tokens. Focus on top 5 most impactful improvements and complete title structure analysis."""
-
- logger.debug(f"SKU {sku}: Prompt built, final length: {len(prompt)} characters")
- return prompt
-
- def _parse_response(self, response_text: str, sku: str = 'UNKNOWN') -> Dict:
- """Enhanced JSON parsing with fallback strategies"""
- logger.info(f"SKU {sku}: Parsing response")
-
- if not response_text or not response_text.strip():
- logger.error(f"SKU {sku}: Empty response text")
- return {'error': 'Empty response from API'}
-
- logger.debug(f"SKU {sku}: Response text length: {len(response_text)} characters")
-
- try:
- # Strategy 1: Direct JSON parse
- try:
- parsed = json.loads(response_text)
- logger.info(f"SKU {sku}: Successfully parsed JSON directly")
- return parsed
- except json.JSONDecodeError as e:
- logger.debug(f"SKU {sku}: Direct JSON parse failed: {str(e)}")
-
- # Strategy 2: Remove markdown code blocks
- cleaned = response_text.strip()
- if '```' in cleaned:
- logger.debug(f"SKU {sku}: Attempting to remove markdown code blocks")
- match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', cleaned, re.DOTALL)
- if match:
- cleaned = match.group(1)
- logger.debug(f"SKU {sku}: Extracted JSON from code block")
- else:
- cleaned = re.sub(r'```(?:json)?', '', cleaned).strip()
- logger.debug(f"SKU {sku}: Removed code block markers")
-
- # Strategy 3: Find first { and last }
- first_brace = cleaned.find('{')
- last_brace = cleaned.rfind('}')
-
- if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
- cleaned = cleaned[first_brace:last_brace + 1]
- logger.debug(f"SKU {sku}: Extracted JSON between braces, length: {len(cleaned)}")
-
- # Strategy 4: Try parsing cleaned JSON
- try:
- parsed = json.loads(cleaned)
- logger.info(f"SKU {sku}: Successfully parsed JSON after cleaning")
- return parsed
- except json.JSONDecodeError as e:
- logger.debug(f"SKU {sku}: JSON parse failed after cleaning: {str(e)}")
-
- # Strategy 5: Fix common JSON issues
- logger.debug(f"SKU {sku}: Attempting JSON syntax fixes")
- cleaned = self._fix_json_syntax(cleaned)
- try:
- parsed = json.loads(cleaned)
- logger.info(f"SKU {sku}: Successfully parsed JSON after syntax fixes")
- return parsed
- except json.JSONDecodeError as e:
- logger.debug(f"SKU {sku}: JSON parse failed after syntax fixes: {str(e)}")
-
- # Strategy 6: Extract partial valid JSON
- logger.debug(f"SKU {sku}: Attempting partial JSON extraction")
- partial_json = self._extract_partial_json(cleaned)
- if partial_json:
- logger.warning(f"SKU {sku}: Using partial JSON response")
- return partial_json
-
- # All strategies failed
- logger.error(f"SKU {sku}: All JSON parsing strategies failed")
- logger.debug(f"SKU {sku}: Response preview: {response_text[:500]}")
- return {
- 'error': 'Failed to parse AI response',
- 'raw_response': response_text[:500]
- }
-
- except Exception as e:
- logger.error(f"SKU {sku}: Parse exception: {type(e).__name__} - {str(e)}")
- logger.debug(f"SKU {sku}: Full traceback: {traceback.format_exc()}")
- return {
- 'error': f'Parse exception: {str(e)}',
- 'raw_response': response_text[:500] if response_text else 'None'
- }
-
- def _fix_json_syntax(self, json_str: str) -> str:
- """Fix common JSON syntax issues"""
- try:
- # Remove trailing commas before closing brackets
- json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
-
- # Remove trailing content after final }
- last_brace = json_str.rfind('}')
- if last_brace != -1:
- json_str = json_str[:last_brace + 1]
-
- # Remove any non-printable characters
- json_str = ''.join(char for char in json_str if char.isprintable() or char in '\n\r\t')
-
- return json_str
- except Exception as e:
- logger.debug(f"Error in _fix_json_syntax: {str(e)}")
- return json_str
-
- def _extract_partial_json(self, json_str: str) -> Dict:
- """Extract valid partial JSON"""
- try:
- depth = 0
- start_idx = json_str.find('{')
- if start_idx == -1:
- return None
-
- for i in range(start_idx, len(json_str)):
- if json_str[i] == '{':
- depth += 1
- elif json_str[i] == '}':
- depth -= 1
- if depth == 0:
- try:
- return json.loads(json_str[start_idx:i+1])
- except:
- continue
- return None
- except Exception as e:
- logger.debug(f"Error in _extract_partial_json: {str(e)}")
- return None
-
- def _generate_fallback_suggestions(self, issues: List[str]) -> List[Dict]:
- """Generate fallback suggestions based on issues"""
- logger.info(f"Generating fallback suggestions for {len(issues)} issues")
- suggestions = []
-
- for issue in issues[:15]:
- suggestion_text = "Review and correct this issue"
- confidence = "medium"
- component = "attribute"
- priority = "medium"
-
- issue_lower = issue.lower()
-
- # Determine component
- if issue.startswith('Title:'):
- component = "title"
- elif issue.startswith('Description:'):
- component = "description"
- elif issue.startswith('SEO:'):
- component = "seo"
-
- # Specific suggestions
- if "missing mandatory" in issue_lower:
- attr = issue.split(":")[-1].strip()
- suggestion_text = f"Add required {attr} - check product packaging or manufacturer details"
- priority = "high"
- confidence = "high"
- elif "too short" in issue_lower:
- if "title" in issue_lower:
- suggestion_text = "Expand title to 50-100 characters including brand, model, and key features"
- component = "title"
- priority = "high"
- elif "description" in issue_lower:
- suggestion_text = "Write comprehensive 50-150 word description with features, benefits, and specifications"
- component = "description"
- priority = "high"
- else:
- suggestion_text = "Provide more detailed information"
- elif "placeholder" in issue_lower:
- suggestion_text = "Replace with actual product data from manufacturer or packaging"
- priority = "high"
- elif "grammar" in issue_lower or "spelling" in issue_lower:
- suggestion_text = "Run spell-check and grammar review, ensure professional language"
- component = "description"
- priority = "medium"
- elif "keyword" in issue_lower or "seo" in issue_lower:
- suggestion_text = "Add relevant search keywords and product attributes"
- component = "seo"
- priority = "medium"
- elif "duplicate" in issue_lower or "repetit" in issue_lower:
- suggestion_text = "Remove duplicate content, provide varied information with unique details"
- component = "description"
- priority = "medium"
- elif "not recognized" in issue_lower or "invalid" in issue_lower:
- suggestion_text = "Use standardized values from category rules"
- priority = "high"
- confidence = "high"
-
- suggestions.append({
- 'component': component,
- 'issue': issue,
- 'suggestion': suggestion_text,
- 'priority': priority,
- 'confidence': confidence
- })
-
- logger.info(f"Generated {len(suggestions)} fallback suggestions")
- return suggestions
|