2 tháng trước cách đây · 16bb49b9eb
--- a/attr_extraction/services.py
+++ b/attr_extraction/services.py
@@ -617,65 +617,196 @@ Do not identify or include any additional attributes. The 'additional' section m
 
				             output_example_additional = '  "additional": {}'
			
 
				 
			
 
				         prompt = f"""
			
 
				-You are a product-attribute classifier and validator.
			
 
				-Understand the product text very deeply. If the same product is available somewhere online, use that knowledge to predict accurate attribute values.
			
 
				-Do not depend only on word-by-word matching from the product text - interpret the meaning and suggest attributes intelligently.
			
 
				-Pick the *closest meaning* value from the allowed list, even if not an exact word match.
			
 
				-I want values for all mandatory attributes.
			
 
				-If a value is not found anywhere, the source should be "inferred".
			
 
				-Note: Source means from where you have concluded the result. Choose one of these value <{source_hint}>
			
 
				-Do not give "Condition" attribute ever.
			
 
				-
			
 
				-
			
 
				-ALLOWED VALUES (MANDATORY):
			
 
				-{allowed_text}
			
 
				-
			
 
				-Note: "Strictly" return multiple values for these attributes: {multiple_text}. These values must be most possible values from the list and should be max 2 values. 
			
 
				-
			
 
				-{user_values_section}
			
 
				+You are an expert product attribute extraction system. Your task is to classify products into predefined categories with 100% accuracy.
			
 
				+
			
 
				+CORE RULES - FOLLOW STRICTLY:
			
 
				+1. You MUST ONLY use values from the ALLOWED VALUES lists provided below
			
 
				+2. NEVER invent, create, or infer values that don't exist in the allowed lists
			
 
				+3. NEVER use synonyms, variations, or similar words - ONLY exact matches from the lists
			
 
				+4. Each value you return MUST be an EXACT COPY from the allowed list (case-sensitive)
			
 
				+5. If you cannot find a perfect match, choose the CLOSEST semantic match from the allowed list
			
 
				+6. When unsure, default to the most generic option from the allowed list
			
 
				+7. CRITICAL GUARDRAIL: NEVER use a value allowed for one attribute (e.g., T-Shirts from t_shirt_type) as the value for a different attribute (e.g., style_clothing_top). The lists are entirely separate.
			
 
				+8. NEVER invent, create, or infer values that don't exist in the allowed lists.
			
 
				+9. Special Instruction for T-Shirts: If the product text describes a generic T-shirt (Crew Neck, V-Neck, etc.) which is a basic shirt worn by pulling it over the head, you MUST use the value Pullover. This is the most appropriate generic style available in this specific list.
			
 
				+
			
 
				+
			
 
				+=====================
			
 
				+ATTRIBUTE DEFINITIONS + ALLOWED VALUES
			
 
				+=====================
			
 
				+
			
 
				+1️⃣ **style_clothing_top** (Mandatory)
			
 
				+Defines the overall silhouette or construction of the top.
			
 
				+
			
 
				+Allowed values + definitions:
			
 
				+- Bandeau: Strapless, tight top covering just bust.
			
 
				+- Blouse: Loose, dressy top, usually with sleeves.
			
 
				+- Camisole: Thin-strapped sleeveless lightweight top.
			
 
				+- Chemise: Straight, loose unshaped top.
			
 
				+- Cocoon: Rounded, oversized body silhouette.
			
 
				+- Corset: Structured top shaping waist with boning.
			
 
				+- Crop: Short top exposing midriff above waist.
			
 
				+- Cutout: Top with intentionally open fabric areas.
			
 
				+- Duster: Long open-front flowy layer.
			
 
				+- Flounce: Top featuring decorative ruffles.
			
 
				+- Full Zip: Zipper opening entire length front.
			
 
				+- Guide: Top with reference markers or functional guides.
			
 
				+- Half Zip: Zipper halfway down front.
			
 
				+- High-Low: Back hem longer than front.
			
 
				+- Hoodie: Top with a hood attached.
			
 
				+- Muscle: Sleeveless with wide armholes, athletic.
			
 
				+- Peasant: Loose boho style with gathered neckline/sleeves.
			
 
				+- Peplum: Fitted top with flared waist ruffle.
			
 
				+- Pullover: Worn by pulling over head; no front opening.
			
 
				+- Quarter Zip: Short zipper from collar ~¼ length down.
			
 
				+- Raglan: Sleeves extend to collar with diagonal seams.
			
 
				+- Ringer: Contrast colored sleeve cuffs + neckline band.
			
 
				+- Rugby: Thick striped or solid collared sport-style top.
			
 
				+- Smocked: Gathered elastic shirring for stretch texture.
			
 
				+- Swing: A-line flare from bust downward.
			
 
				+- Torsette: Corset-like, bust exposed for layering.
			
 
				+- Tube: Strapless elongated top; longer than bandeau.
			
 
				+- Zip-Up: Top with zipper closure (partial/full).
			
 
				+
			
 
				+
			
 
				+2️⃣ **shirt_neck_style** (Mandatory)
			
 
				+Describes the neckline’s shape and construction.
			
 
				+
			
 
				+Allowed values + definitions:
			
 
				+- Boat Neck: Wide neckline shoulder-to-shoulder.
			
 
				+- Caged Neck: Multiple straps forming cage-like design.
			
 
				+- Choker Neck: Tight high neck like choker band.
			
 
				+- Collared: Fold-over collar shirt/polo style.
			
 
				+- Cowl Neck: Draped soft neckline folds.
			
 
				+- Crew Neck: Close-fitting round classic neckline.
			
 
				+- Deep V-Neck: Deep V shape below chest level.
			
 
				+- Drape Neck: Softly draped neckline less than cowl.
			
 
				+- Funnel Neck: Short stand-up collar not folded.
			
 
				+- Halter: Straps around neck leaving shoulders bare.
			
 
				+- Henley: Round neck with button placket.
			
 
				+- High Neck: More neck coverage, no fold.
			
 
				+- Hooded: Neck includes a hood.
			
 
				+- Jewel Neck: High round neck at base of throat.
			
 
				+- Keyhole Neck: Slit/hole opening at neckline.
			
 
				+- Lace Neckline: Lace material used around neckline.
			
 
				+- Mock Neck: Short raised collar, not folded.
			
 
				+- Notch Neck: Small V cut in round neckline.
			
 
				+- Open: General wide/open neckline shape.
			
 
				+- Plunge: Very deep V/U revealing cleavage.
			
 
				+- Roll Neck: Loose rolled turtleneck style.
			
 
				+- Round Neck: Standard circular neckline.
			
 
				+- Round Neckline: Same as Round Neck.
			
 
				+- Scoop Neck: Wide deep U-shaped neckline.
			
 
				+- Scrunch Neck: Gathered/scrunched fabric at neckline.
			
 
				+- Slit Neck: Small vertical slit opening at front.
			
 
				+- Square Neckline: Straight edged square neck.
			
 
				+- Sweetheart: Heart-shaped neckline contour.
			
 
				+- Tie Neck: Ties or bow at neckline.
			
 
				+- Turtleneck: High folded collar covering full neck.
			
 
				+- V-Neck: V-shaped neckline (not deep).
			
 
				+- Wide Neck: Broad neckline toward shoulders.
			
 
				+- Zip Mock Neck: Mock neck with zipper.
			
 
				+
			
 
				+3️⃣ **t_shirt_type** (Mandatory)
			
 
				+Defines the category/design purpose of the T-shirt.
			
 
				+
			
 
				+Allowed values + definitions:
			
 
				+- Babydoll T-Shirt: Feminine fitted shirt with flared hem.
			
 
				+- Classic T-Shirt: Standard basic crew tee.
			
 
				+- Graphic Tees: T-shirts featuring printed graphics.
			
 
				+- Pocket Tee: T-shirt having a chest pocket.
			
 
				+- T-Shirts: General type when no specific style is clear.
			
 
				+- Tank Tops: Sleeveless shirts with shoulder straps.
			
 
				+
			
 
				+=====================
			
 
				+MANDATORY ATTRIBUTE RULES
			
 
				+=====================
			
 
				+
			
 
				+MULTI-VALUE ATTRIBUTES (Return exactly 2 most relevant values ONLY):
			
 
				+{multiple_text}
			
 
				+
			
 
				+SINGLE-VALUE ATTRIBUTES (Return exactly 1 value for all other attributes)
			
 
				+
			
 
				+=====================
			
 
				+INPUT PRODUCT DETAILS
			
 
				+=====================
			
 
				+PRODUCT INFORMATION TO ANALYZE:
			
 
				+{product_text}
			
 
				 
			
 
				 {additional_instructions}
			
 
				 
			
 
				-PRODUCT TEXT:
			
 
				-{product_text}
			
 
				+=====================
			
 
				+EXTRACTION STRATEGY
			
 
				+=====================
			
 
				+Step 1: Read product text carefully
			
 
				+Step 2: Identify strong style/type/neck indicators
			
 
				+Step 3: Choose closest exact match from allowed list
			
 
				+Step 4: VALIDATE chosen values exist in allowed list
			
 
				+
			
 
				+=====================
			
 
				+FORBIDDEN ACTIONS
			
 
				+=====================
			
 
				+:x: NEVER invent values
			
 
				+:x: NEVER modify spelling/capitalization
			
 
				+:x: NEVER use non-allowed synonyms
			
 
				+:x: NEVER return attributes outside allowed lists
			
 
				+:x: NEVER use descriptive words like “short sleeve”, “women top”, “graphic print” unless EXACT in list
			
 
				+
			
 
				+=====================
			
 
				+OUTPUT FORMAT
			
 
				+=====================
			
 
				+Return ONLY this JSON structure — no markdown, no explanation:
			
 
				 
			
 
				-OUTPUT (strict JSON only):
			
 
				 {{
			
 
				   "mandatory": {{
			
 
				-    "<attr>": [{{
			
 
				-      "value": "<chosen_value>", 
			
 
				-      "source": "<{source_hint}>",
			
 
				-      "reason": "Explanation of why this value was chosen. If user provided a value, explain why you agreed/disagreed with it.",
			
 
				-      "original_value": "<user_entered_value_if_provided>",
			
 
				-      "decision": "accepted|rejected|not_provided"
			
 
				+    "style_clothing_top": [{{
			
 
				+      "value": "<exact value from allowed list>",
			
 
				+      "source": "{source_hint}",
			
 
				+      "reason": "Explain specific mapping from product text"
			
 
				+    }}],
			
 
				+    "shirt_neck_style": [{{
			
 
				+      "value": "<exact value from allowed list>",
			
 
				+      "source": "{source_hint}",
			
 
				+      "reason": "Explain specific mapping from product text"
			
 
				+    }}],
			
 
				+    "t_shirt_type": [{{
			
 
				+      "value": "<exact value from allowed list>",
			
 
				+      "source": "{source_hint}",
			
 
				+      "reason": "Explain specific mapping from product text"
			
 
				     }}]
			
 
				   }},
			
 
				 {output_example_additional}
			
 
				 }}
			
 
				 
			
 
				-RULES:
			
 
				-- For each mandatory attribute with a user-entered value, include "original_value" and "decision" fields
			
 
				-- "decision" values: "accepted" (used user's value), "rejected" (used different value), "not_provided" (no user value given)
			
 
				-- "reason" must explain your choice, especially when rejecting user input
			
 
				-- For 'multiple' attributes, always give multiple values for those attributes, choose wisely and max 2 values per attribute that are very close. 
			
 
				-- Source must be one of: {source_hint}
			
 
				-- Be honest and specific in your reasoning.
			
 
				-- Return ONLY valid JSON
			
 
				+=====================
			
 
				+FINAL VALIDATION BEFORE RESPONDING
			
 
				+=====================
			
 
				+✓ All values EXACT from allowed list
			
 
				+✓ Multi-value attributes have exactly 2 values
			
 
				+✓ Single-value attributes have exactly 1 value
			
 
				+✓ “source” must be one of: {source_hint}
			
 
				+✓ Reasons clearly explain mapping
			
 
				+✓ Pure JSON — no markdown wrapper
			
 
				+
			
 
				 """
			
 
				 
			
 
				+
			
 
				+        print(f"Prompt to the llm is: {prompt}")
			
 
				+
			
 
				         payload = {
			
 
				             "model": model,
			
 
				             "messages": [
			
 
				                 {"role": "system", "content": "You are a JSON-only extractor and validator. Always provide clear reasoning for your decisions."},
			
 
				                 {"role": "user", "content": prompt},
			
 
				             ],
			
 
				-            "temperature": 0.3,
			
 
				+            "temperature": 0.0,
			
 
				             "max_tokens": 2000,  # Increased for reasoning
			
 
				         }
			
 
				 
			
 
				         try:
			
 
				             raw = ProductAttributeService._call_llm(payload)
			
 
				             logger.info("Raw LLM response received")
			
 
				+            print(raw)
			
 
				             cleaned = ProductAttributeService._clean_json(raw)
			
 
				             parsed = json.loads(cleaned)
			
 
				         except Exception as exc:
			
--- a/attr_extraction/urls.py
+++ b/attr_extraction/urls.py
@@ -20,7 +20,7 @@ from .views import CacheManagementView, CacheStatsView
 
				 urlpatterns = [
			
 
				     # Existing endpoints
			
 
				     path('extract/', ExtractProductAttributesView.as_view(), name='extract-attributes'),
			
 
				-    path('batch-extract-actual/', BatchExtractProductAttributesView.as_view(), name='batch-extract-attributes'),
			
 
				+    path('batch-extract/', BatchExtractProductAttributesView.as_view(), name='batch-extract-attributes'),
			
 
				     path('products/', ProductListView.as_view(), name='product-list'),
			
 
				     path('products/upload-excel/', ProductUploadExcelView.as_view(), name='product-upload-excel'),
			
 
				     path('products/upload-attributes/', ProductAttributesUploadView.as_view(), name='product-upload-attributes'),
			
@@ -38,7 +38,7 @@ urlpatterns = [
 
				     path('cache/management/', CacheManagementView.as_view(), name='cache-management'),
			
 
				     path('cache/stats/', CacheStatsView.as_view(), name='cache-stats'),
			
 
				 
			
 
				-    path('batch-extract/', ReadLocalJSONView.as_view(), name='ReadLocalJSONView'),
			
 
				+    path('batch-extract-json/', ReadLocalJSONView.as_view(), name='ReadLocalJSONView'),
			
 
				 
			
 
				 ]
			
 
				 
			
--- a/attr_extraction/views.py
+++ b/attr_extraction/views.py
@@ -566,7 +566,8 @@ class BatchExtractProductAttributesView(APIView):
 
				                     extract_additional=extract_additional,
			
 
				                     multiple=multiple,
			
 
				                     use_cache=True,
			
 
				-                    user_entered_values=user_entered_values  # 🆕 PASS USER VALUES
			
 
				+                    user_entered_values=""  # 🆕 PASS NULL
			
 
				+                    # user_entered_values=user_entered_values  # 🆕 PASS USER VALUES
			
 
				                 )
			
 
				 
			
 
				                 # NOTE: Original values are now part of LLM response with reasoning
			
--- a/db.sqlite3
+++ b/db.sqlite3