views.py 93 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179
  1. from rest_framework.views import APIView
  2. from rest_framework.response import Response
  3. from rest_framework import status
  4. from rest_framework.parsers import MultiPartParser, FormParser
  5. from django.db import transaction
  6. import pandas as pd
  7. from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
  8. from .serializers import (
  9. SingleProductRequestSerializer,
  10. BatchProductRequestSerializer,
  11. ProductAttributeResultSerializer,
  12. BatchProductResponseSerializer,
  13. ProductSerializer,
  14. ProductTypeSerializer,
  15. ProductAttributeSerializer,
  16. AttributePossibleValueSerializer
  17. )
  18. from .services import ProductAttributeService
  19. from .ocr_service import OCRService
  20. # Sample test images (publicly available)
  21. SAMPLE_IMAGES = {
  22. "tshirt": "https://images.unsplash.com/photo-1521572163474-6864f9cf17ab",
  23. "dress": "https://images.unsplash.com/photo-1595777457583-95e059d581b8",
  24. "jeans": "https://images.unsplash.com/photo-1542272604-787c3835535d"
  25. }
  26. # ==================== Updated views.py ====================
  27. from rest_framework.views import APIView
  28. from rest_framework.response import Response
  29. from rest_framework import status
  30. from .models import Product
  31. from .services import ProductAttributeService
  32. from .ocr_service import OCRService
  33. from .visual_processing_service import VisualProcessingService
  34. class ExtractProductAttributesView(APIView):
  35. """
  36. API endpoint to extract product attributes for a single product by item_id.
  37. Fetches product details from database with source tracking.
  38. Returns attributes in array format: [{"value": "...", "source": "..."}]
  39. Includes OCR and Visual Processing results.
  40. """
  41. def post(self, request):
  42. serializer = SingleProductRequestSerializer(data=request.data)
  43. if not serializer.is_valid():
  44. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  45. validated_data = serializer.validated_data
  46. item_id = validated_data.get("item_id")
  47. # Fetch product from DB
  48. try:
  49. product = Product.objects.get(item_id=item_id)
  50. except Product.DoesNotExist:
  51. return Response(
  52. {"error": f"Product with item_id '{item_id}' not found."},
  53. status=status.HTTP_404_NOT_FOUND
  54. )
  55. # Extract product details
  56. title = product.product_name
  57. short_desc = product.product_short_description
  58. long_desc = product.product_long_description
  59. image_url = product.image_path
  60. # Process image for OCR if required
  61. ocr_results = None
  62. ocr_text = None
  63. visual_results = None
  64. if validated_data.get("process_image", True) and image_url:
  65. # OCR Processing
  66. ocr_service = OCRService()
  67. ocr_results = ocr_service.process_image(image_url)
  68. if ocr_results and ocr_results.get("detected_text"):
  69. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  70. ocr_results, validated_data.get("model")
  71. )
  72. ocr_results["extracted_attributes"] = ocr_attrs
  73. ocr_text = "\n".join([
  74. f"{item['text']} (confidence: {item['confidence']:.2f})"
  75. for item in ocr_results["detected_text"]
  76. ])
  77. # Visual Processing
  78. visual_service = VisualProcessingService()
  79. product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  80. visual_results = visual_service.process_image(image_url, product_type_hint)
  81. # Combine all product text with source tracking
  82. product_text, source_map = ProductAttributeService.combine_product_text(
  83. title=title,
  84. short_desc=short_desc,
  85. long_desc=long_desc,
  86. ocr_text=ocr_text
  87. )
  88. # Extract attributes with enhanced features and source tracking
  89. result = ProductAttributeService.extract_attributes(
  90. product_text=product_text,
  91. mandatory_attrs=validated_data["mandatory_attrs"],
  92. source_map=source_map,
  93. model=validated_data.get("model"),
  94. extract_additional=validated_data.get("extract_additional", True),
  95. multiple=validated_data.get("multiple", []),
  96. threshold_abs=validated_data.get("threshold_abs", 0.65),
  97. margin=validated_data.get("margin", 0.15),
  98. use_dynamic_thresholds=validated_data.get("use_dynamic_thresholds", True),
  99. use_adaptive_margin=validated_data.get("use_adaptive_margin", True),
  100. use_semantic_clustering=validated_data.get("use_semantic_clustering", True)
  101. )
  102. # Attach OCR results if available
  103. if ocr_results:
  104. result["ocr_results"] = ocr_results
  105. # Attach Visual Processing results if available
  106. if visual_results:
  107. result["visual_results"] = visual_results
  108. response_serializer = ProductAttributeResultSerializer(data=result)
  109. if response_serializer.is_valid():
  110. return Response(response_serializer.data, status=status.HTTP_200_OK)
  111. return Response(result, status=status.HTTP_200_OK)
  112. # Replace the BatchExtractProductAttributesView in your views.py with this updated version
  113. # class BatchExtractProductAttributesView(APIView):
  114. # """
  115. # API endpoint to extract product attributes for multiple products in batch.
  116. # Uses item-specific mandatory_attrs with source tracking.
  117. # Returns attributes in array format with original_value field.
  118. # Includes OCR and Visual Processing results.
  119. # """
  120. # def post(self, request):
  121. # serializer = BatchProductRequestSerializer(data=request.data)
  122. # if not serializer.is_valid():
  123. # return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  124. # validated_data = serializer.validated_data
  125. # # Get batch-level settings
  126. # product_list = validated_data.get("products", [])
  127. # model = validated_data.get("model")
  128. # extract_additional = validated_data.get("extract_additional", True)
  129. # process_image = validated_data.get("process_image", True)
  130. # multiple = validated_data.get("multiple", [])
  131. # threshold_abs = validated_data.get("threshold_abs", 0.65)
  132. # margin = validated_data.get("margin", 0.15)
  133. # use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
  134. # use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
  135. # use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
  136. # # Extract all item_ids to query the database efficiently
  137. # item_ids = [p['item_id'] for p in product_list]
  138. # # Fetch all products in one query
  139. # products_queryset = Product.objects.filter(item_id__in=item_ids)
  140. # product_map = {product.item_id: product for product in products_queryset}
  141. # found_ids = set(product_map.keys())
  142. # # Fetch all original attribute values for these products in one query
  143. # original_values_qs = ProductAttributeValue.objects.filter(
  144. # product__item_id__in=item_ids
  145. # ).select_related('product')
  146. # # Create a nested dictionary: {item_id: {attribute_name: original_value}}
  147. # original_values_map = {}
  148. # for attr_val in original_values_qs:
  149. # item_id = attr_val.product.item_id
  150. # if item_id not in original_values_map:
  151. # original_values_map[item_id] = {}
  152. # original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
  153. # results = []
  154. # successful = 0
  155. # failed = 0
  156. # for product_entry in product_list:
  157. # item_id = product_entry['item_id']
  158. # mandatory_attrs = product_entry['mandatory_attrs']
  159. # if item_id not in found_ids:
  160. # failed += 1
  161. # results.append({
  162. # "product_id": item_id,
  163. # "error": "Product not found in database"
  164. # })
  165. # continue
  166. # product = product_map[item_id]
  167. # try:
  168. # title = product.product_name
  169. # short_desc = product.product_short_description
  170. # long_desc = product.product_long_description
  171. # image_url = product.image_path
  172. # ocr_results = None
  173. # ocr_text = None
  174. # visual_results = None
  175. # # Image Processing Logic
  176. # if process_image and image_url:
  177. # # OCR Processing
  178. # ocr_service = OCRService()
  179. # ocr_results = ocr_service.process_image(image_url)
  180. # if ocr_results and ocr_results.get("detected_text"):
  181. # ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  182. # ocr_results, model
  183. # )
  184. # ocr_results["extracted_attributes"] = ocr_attrs
  185. # ocr_text = "\n".join([
  186. # f"{item['text']} (confidence: {item['confidence']:.2f})"
  187. # for item in ocr_results["detected_text"]
  188. # ])
  189. # # Visual Processing
  190. # visual_service = VisualProcessingService()
  191. # product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  192. # visual_results = visual_service.process_image(image_url, product_type_hint)
  193. # # Format visual attributes to array format with source tracking
  194. # if visual_results and visual_results.get('visual_attributes'):
  195. # visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  196. # visual_results['visual_attributes']
  197. # )
  198. # # Combine product text with source tracking
  199. # product_text, source_map = ProductAttributeService.combine_product_text(
  200. # title=title,
  201. # short_desc=short_desc,
  202. # long_desc=long_desc,
  203. # ocr_text=ocr_text
  204. # )
  205. # # Attribute Extraction with source tracking (returns array format)
  206. # extracted = ProductAttributeService.extract_attributes(
  207. # product_text=product_text,
  208. # mandatory_attrs=mandatory_attrs,
  209. # source_map=source_map,
  210. # model=model,
  211. # extract_additional=extract_additional,
  212. # multiple=multiple,
  213. # threshold_abs=threshold_abs,
  214. # margin=margin,
  215. # use_dynamic_thresholds=use_dynamic_thresholds,
  216. # use_adaptive_margin=use_adaptive_margin,
  217. # use_semantic_clustering=use_semantic_clustering
  218. # )
  219. # # Add original_value to each extracted attribute
  220. # original_attrs = original_values_map.get(item_id, {})
  221. # # Process mandatory attributes
  222. # for attr_name, attr_values in extracted.get("mandatory", {}).items():
  223. # if isinstance(attr_values, list):
  224. # for attr_obj in attr_values:
  225. # if isinstance(attr_obj, dict):
  226. # # Add original_value if it exists
  227. # attr_obj["original_value"] = original_attrs.get(attr_name, "")
  228. # # Process additional attributes
  229. # for attr_name, attr_values in extracted.get("additional", {}).items():
  230. # if isinstance(attr_values, list):
  231. # for attr_obj in attr_values:
  232. # if isinstance(attr_obj, dict):
  233. # # Add original_value if it exists
  234. # attr_obj["original_value"] = original_attrs.get(attr_name, "")
  235. # result = {
  236. # "product_id": product.item_id,
  237. # "mandatory": extracted.get("mandatory", {}),
  238. # "additional": extracted.get("additional", {}),
  239. # }
  240. # # Attach OCR results if available
  241. # if ocr_results:
  242. # result["ocr_results"] = ocr_results
  243. # # Attach Visual Processing results if available
  244. # if visual_results:
  245. # result["visual_results"] = visual_results
  246. # results.append(result)
  247. # successful += 1
  248. # except Exception as e:
  249. # failed += 1
  250. # results.append({
  251. # "product_id": item_id,
  252. # "error": str(e)
  253. # })
  254. # batch_result = {
  255. # "results": results,
  256. # "total_products": len(product_list),
  257. # "successful": successful,
  258. # "failed": failed
  259. # }
  260. # response_serializer = BatchProductResponseSerializer(data=batch_result)
  261. # if response_serializer.is_valid():
  262. # return Response(response_serializer.data, status=status.HTTP_200_OK)
  263. # return Response(batch_result, status=status.HTTP_200_OK)
  264. # views.py - OPTIMIZED WITHOUT REDIS/CELERY
  265. # class BatchExtractProductAttributesView(APIView):
  266. # """
  267. # Optimized batch extraction using ThreadPoolExecutor (built-in Python)
  268. # """
  269. # def post(self, request):
  270. # serializer = BatchProductRequestSerializer(data=request.data)
  271. # if not serializer.is_valid():
  272. # return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  273. # validated_data = serializer.validated_data
  274. # product_list = validated_data.get("products", [])
  275. # # OPTIMIZATION 1: Single optimized database query
  276. # item_ids = [p['item_id'] for p in product_list]
  277. # products_queryset = Product.objects.filter(
  278. # item_id__in=item_ids
  279. # ).prefetch_related('attribute_values') # Single query!
  280. # product_map = {product.item_id: product for product in products_queryset}
  281. # # OPTIMIZATION 2: Prefetch ALL original attribute values in ONE query
  282. # original_values_qs = ProductAttributeValue.objects.filter(
  283. # product__item_id__in=item_ids
  284. # ).select_related('product')
  285. # original_values_map = {}
  286. # for attr_val in original_values_qs:
  287. # item_id = attr_val.product.item_id
  288. # if item_id not in original_values_map:
  289. # original_values_map[item_id] = {}
  290. # original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
  291. # # Extract settings
  292. # model = validated_data.get("model")
  293. # extract_additional = validated_data.get("extract_additional", True)
  294. # process_image = validated_data.get("process_image", True)
  295. # multiple = validated_data.get("multiple", [])
  296. # threshold_abs = validated_data.get("threshold_abs", 0.65)
  297. # margin = validated_data.get("margin", 0.15)
  298. # use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
  299. # use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
  300. # use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
  301. # results = []
  302. # successful = 0
  303. # failed = 0
  304. # # OPTIMIZATION 3: Initialize services once
  305. # ocr_service = OCRService() if process_image else None
  306. # visual_service = VisualProcessingService() if process_image else None
  307. # # OPTIMIZATION 4: Process in parallel using ThreadPoolExecutor
  308. # def process_single_product(product_entry):
  309. # """Process a single product (runs in parallel)"""
  310. # item_id = product_entry['item_id']
  311. # mandatory_attrs = product_entry['mandatory_attrs']
  312. # if item_id not in product_map:
  313. # return {
  314. # "product_id": item_id,
  315. # "error": "Product not found in database"
  316. # }, False
  317. # product = product_map[item_id]
  318. # try:
  319. # title = product.product_name
  320. # short_desc = product.product_short_description
  321. # long_desc = product.product_long_description
  322. # image_url = product.image_path
  323. # ocr_results = None
  324. # ocr_text = None
  325. # visual_results = None
  326. # # Image processing (if enabled)
  327. # if process_image and image_url:
  328. # if ocr_service:
  329. # ocr_results = ocr_service.process_image(image_url)
  330. # if ocr_results and ocr_results.get("detected_text"):
  331. # ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  332. # ocr_results, model
  333. # )
  334. # ocr_results["extracted_attributes"] = ocr_attrs
  335. # ocr_text = "\n".join([
  336. # f"{item['text']} (confidence: {item['confidence']:.2f})"
  337. # for item in ocr_results["detected_text"]
  338. # ])
  339. # if visual_service:
  340. # product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  341. # visual_results = visual_service.process_image(image_url, product_type_hint)
  342. # if visual_results and visual_results.get('visual_attributes'):
  343. # visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  344. # visual_results['visual_attributes']
  345. # )
  346. # # Combine product text with source tracking
  347. # product_text, source_map = ProductAttributeService.combine_product_text(
  348. # title=title,
  349. # short_desc=short_desc,
  350. # long_desc=long_desc,
  351. # ocr_text=ocr_text
  352. # )
  353. # # Extract attributes (WITH CACHING ENABLED)
  354. # extracted = ProductAttributeService.extract_attributes(
  355. # product_text=product_text,
  356. # mandatory_attrs=mandatory_attrs,
  357. # source_map=source_map,
  358. # model=model,
  359. # extract_additional=extract_additional,
  360. # multiple=multiple,
  361. # threshold_abs=threshold_abs,
  362. # margin=margin,
  363. # use_dynamic_thresholds=use_dynamic_thresholds,
  364. # use_adaptive_margin=use_adaptive_margin,
  365. # use_semantic_clustering=use_semantic_clustering,
  366. # use_cache=True # Enable caching!
  367. # )
  368. # # Add original values
  369. # original_attrs = original_values_map.get(item_id, {})
  370. # for attr_name, attr_values in extracted.get("mandatory", {}).items():
  371. # if isinstance(attr_values, list):
  372. # for attr_obj in attr_values:
  373. # if isinstance(attr_obj, dict):
  374. # attr_obj["original_value"] = original_attrs.get(attr_name, "")
  375. # for attr_name, attr_values in extracted.get("additional", {}).items():
  376. # if isinstance(attr_values, list):
  377. # for attr_obj in attr_values:
  378. # if isinstance(attr_obj, dict):
  379. # attr_obj["original_value"] = original_attrs.get(attr_name, "")
  380. # result = {
  381. # "product_id": product.item_id,
  382. # "mandatory": extracted.get("mandatory", {}),
  383. # "additional": extracted.get("additional", {}),
  384. # }
  385. # if ocr_results:
  386. # result["ocr_results"] = ocr_results
  387. # if visual_results:
  388. # result["visual_results"] = visual_results
  389. # return result, True
  390. # except Exception as e:
  391. # return {
  392. # "product_id": item_id,
  393. # "error": str(e)
  394. # }, False
  395. # # OPTIMIZATION 5: Use ThreadPoolExecutor for parallel processing
  396. # import concurrent.futures
  397. # max_workers = min(10, len(product_list)) # Up to 10 parallel workers
  398. # with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
  399. # # Submit all tasks
  400. # future_to_product = {
  401. # executor.submit(process_single_product, product): product
  402. # for product in product_list
  403. # }
  404. # # Collect results as they complete
  405. # for future in concurrent.futures.as_completed(future_to_product):
  406. # try:
  407. # result, success = future.result()
  408. # results.append(result)
  409. # if success:
  410. # successful += 1
  411. # else:
  412. # failed += 1
  413. # except Exception as e:
  414. # failed += 1
  415. # logger.error(f"Unexpected error: {str(e)}")
  416. # results.append({
  417. # "product_id": "unknown",
  418. # "error": str(e)
  419. # })
  420. # batch_result = {
  421. # "results": results,
  422. # "total_products": len(product_list),
  423. # "successful": successful,
  424. # "failed": failed
  425. # }
  426. # response_serializer = BatchProductResponseSerializer(data=batch_result)
  427. # if response_serializer.is_valid():
  428. # return Response(response_serializer.data, status=status.HTTP_200_OK)
  429. # return Response(batch_result, status=status.HTTP_200_OK)
  430. # ==================== OPTIMIZED BATCH VIEW ====================
  431. import concurrent.futures
  432. class BatchExtractProductAttributesView(APIView):
  433. """
  434. ⚡ PERFORMANCE OPTIMIZED: Batch extraction with intelligent parallelization
  435. Expected performance: 10 products in 30-60 seconds (with image processing)
  436. """
  437. def post(self, request):
  438. import time
  439. start_time = time.time()
  440. serializer = BatchProductRequestSerializer(data=request.data)
  441. if not serializer.is_valid():
  442. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  443. validated_data = serializer.validated_data
  444. product_list = validated_data.get("products", [])
  445. logger.info(f"🚀 Starting batch processing for {len(product_list)} products")
  446. # ==================== OPTIMIZATION 1: Bulk DB Query ====================
  447. item_ids = [p['item_id'] for p in product_list]
  448. products_queryset = Product.objects.filter(
  449. item_id__in=item_ids
  450. ).prefetch_related('attribute_values')
  451. product_map = {product.item_id: product for product in products_queryset}
  452. # Prefetch ALL original attribute values in ONE query
  453. original_values_qs = ProductAttributeValue.objects.filter(
  454. product__item_id__in=item_ids
  455. ).select_related('product')
  456. original_values_map = {}
  457. for attr_val in original_values_qs:
  458. item_id = attr_val.product.item_id
  459. if item_id not in original_values_map:
  460. original_values_map[item_id] = {}
  461. original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
  462. logger.info(f"✓ Loaded {len(product_map)} products from database")
  463. # Extract settings
  464. model = validated_data.get("model")
  465. extract_additional = validated_data.get("extract_additional", True)
  466. process_image = validated_data.get("process_image", True)
  467. multiple = validated_data.get("multiple", [])
  468. threshold_abs = validated_data.get("threshold_abs", 0.65)
  469. margin = validated_data.get("margin", 0.15)
  470. use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
  471. use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
  472. use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
  473. results = []
  474. successful = 0
  475. failed = 0
  476. # ==================== OPTIMIZATION 2: Conditional Service Init ====================
  477. # Only initialize if processing images
  478. ocr_service = None
  479. visual_service = None
  480. if process_image:
  481. from .ocr_service import OCRService
  482. from .visual_processing_service import VisualProcessingService
  483. ocr_service = OCRService()
  484. visual_service = VisualProcessingService()
  485. logger.info("✓ Image processing services initialized")
  486. # ==================== OPTIMIZATION 3: Smart Parallelization ====================
  487. def process_single_product(product_entry):
  488. """Process a single product (runs in parallel)"""
  489. import time
  490. product_start = time.time()
  491. item_id = product_entry['item_id']
  492. mandatory_attrs = product_entry['mandatory_attrs']
  493. if item_id not in product_map:
  494. return {
  495. "product_id": item_id,
  496. "error": "Product not found in database"
  497. }, False
  498. product = product_map[item_id]
  499. try:
  500. title = product.product_name
  501. short_desc = product.product_short_description
  502. long_desc = product.product_long_description
  503. image_url = product.image_path
  504. ocr_results = None
  505. ocr_text = None
  506. visual_results = None
  507. # ⚡ SKIP IMAGE PROCESSING IF DISABLED (HUGE TIME SAVER)
  508. if process_image and image_url:
  509. if ocr_service:
  510. ocr_results = ocr_service.process_image(image_url)
  511. if ocr_results and ocr_results.get("detected_text"):
  512. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  513. ocr_results, model
  514. )
  515. ocr_results["extracted_attributes"] = ocr_attrs
  516. ocr_text = "\n".join([
  517. f"{item['text']} (confidence: {item['confidence']:.2f})"
  518. for item in ocr_results["detected_text"]
  519. ])
  520. if visual_service:
  521. product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  522. visual_results = visual_service.process_image(image_url, product_type_hint)
  523. if visual_results and visual_results.get('visual_attributes'):
  524. visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  525. visual_results['visual_attributes']
  526. )
  527. # Combine product text with source tracking
  528. product_text, source_map = ProductAttributeService.combine_product_text(
  529. title=title,
  530. short_desc=short_desc,
  531. long_desc=long_desc,
  532. ocr_text=ocr_text
  533. )
  534. # ⚡ EXTRACT ATTRIBUTES WITH CACHING ENABLED
  535. extracted = ProductAttributeService.extract_attributes(
  536. product_text=product_text,
  537. mandatory_attrs=mandatory_attrs,
  538. source_map=source_map,
  539. model=model,
  540. extract_additional=extract_additional,
  541. multiple=multiple,
  542. threshold_abs=threshold_abs,
  543. margin=margin,
  544. use_dynamic_thresholds=use_dynamic_thresholds,
  545. use_adaptive_margin=use_adaptive_margin,
  546. use_semantic_clustering=use_semantic_clustering,
  547. use_cache=True # ⚡ CRITICAL: Enable caching
  548. )
  549. # Add original values
  550. original_attrs = original_values_map.get(item_id, {})
  551. for attr_name, attr_values in extracted.get("mandatory", {}).items():
  552. if isinstance(attr_values, list):
  553. for attr_obj in attr_values:
  554. if isinstance(attr_obj, dict):
  555. attr_obj["original_value"] = original_attrs.get(attr_name, "")
  556. for attr_name, attr_values in extracted.get("additional", {}).items():
  557. if isinstance(attr_values, list):
  558. for attr_obj in attr_values:
  559. if isinstance(attr_obj, dict):
  560. attr_obj["original_value"] = original_attrs.get(attr_name, "")
  561. result = {
  562. "product_id": product.item_id,
  563. "mandatory": extracted.get("mandatory", {}),
  564. "additional": extracted.get("additional", {}),
  565. }
  566. if ocr_results:
  567. result["ocr_results"] = ocr_results
  568. if visual_results:
  569. result["visual_results"] = visual_results
  570. processing_time = time.time() - product_start
  571. logger.info(f"✓ Processed {item_id} in {processing_time:.2f}s")
  572. return result, True
  573. except Exception as e:
  574. logger.error(f"❌ Error processing {item_id}: {str(e)}")
  575. return {
  576. "product_id": item_id,
  577. "error": str(e)
  578. }, False
  579. # ==================== OPTIMIZATION 4: Parallel Execution ====================
  580. # Adjust workers based on whether image processing is enabled
  581. max_workers = min(3 if process_image else 10, len(product_list))
  582. logger.info(f"⚡ Using {max_workers} parallel workers")
  583. with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
  584. # Submit all tasks
  585. future_to_product = {
  586. executor.submit(process_single_product, product): product
  587. for product in product_list
  588. }
  589. # Collect results as they complete
  590. for future in concurrent.futures.as_completed(future_to_product):
  591. try:
  592. result, success = future.result()
  593. results.append(result)
  594. if success:
  595. successful += 1
  596. else:
  597. failed += 1
  598. except Exception as e:
  599. failed += 1
  600. logger.error(f"❌ Future execution error: {str(e)}")
  601. results.append({
  602. "product_id": "unknown",
  603. "error": str(e)
  604. })
  605. total_time = time.time() - start_time
  606. # Get cache statistics
  607. cache_stats = ProductAttributeService.get_cache_stats()
  608. logger.info(f"""
  609. 🎉 BATCH PROCESSING COMPLETE
  610. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  611. Total products: {len(product_list)}
  612. Successful: {successful}
  613. Failed: {failed}
  614. Total time: {total_time:.2f}s
  615. Avg time/product: {total_time/len(product_list):.2f}s
  616. Cache hit rate: {cache_stats['embedding_cache']['hit_rate_percent']:.1f}%
  617. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  618. """)
  619. batch_result = {
  620. "results": results,
  621. "total_products": len(product_list),
  622. "successful": successful,
  623. "failed": failed,
  624. "performance": {
  625. "total_time_seconds": round(total_time, 2),
  626. "avg_time_per_product": round(total_time / len(product_list), 2),
  627. "workers_used": max_workers
  628. },
  629. "cache_stats": cache_stats
  630. }
  631. response_serializer = BatchProductResponseSerializer(data=batch_result)
  632. if response_serializer.is_valid():
  633. return Response(response_serializer.data, status=status.HTTP_200_OK)
  634. return Response(batch_result, status=status.HTTP_200_OK)
  635. class ProductListView(APIView):
  636. """
  637. GET API to list all products with details
  638. """
  639. def get(self, request):
  640. products = Product.objects.all()
  641. serializer = ProductSerializer(products, many=True)
  642. return Response(serializer.data, status=status.HTTP_200_OK)
  643. from rest_framework.views import APIView
  644. from rest_framework.response import Response
  645. from rest_framework import status
  646. from rest_framework.parsers import MultiPartParser, FormParser
  647. import pandas as pd
  648. from .models import Product
  649. from rest_framework.views import APIView
  650. from rest_framework.response import Response
  651. from rest_framework import status
  652. from rest_framework.parsers import MultiPartParser, FormParser
  653. from django.db import transaction
  654. import pandas as pd
  655. from .models import Product, ProductAttributeValue
  656. import logging
  657. import json
  658. from rest_framework.views import APIView
  659. from rest_framework.response import Response
  660. from rest_framework import status
  661. from rest_framework.parsers import MultiPartParser, FormParser
  662. from django.db import transaction
  663. from django.db.models import Prefetch
  664. import pandas as pd
  665. # Import ALL your models
  666. from .models import Product, ProductAttributeValue, ProductType, ProductAttribute, AttributePossibleValue
  667. from .services import ProductAttributeService
  668. from .ocr_service import OCRService
  669. from .visual_processing_service import VisualProcessingService
  670. from openpyxl import Workbook
  671. from openpyxl.styles import Font, PatternFill, Alignment
  672. from django.conf import settings
  673. import os
  674. import threading
  675. from datetime import datetime
  676. # --- Configuration for Generated Outputs Folder ---
  677. OUTPUT_FOLDER_NAME = 'generated_outputs'
  678. OUTPUT_ROOT = os.path.join(settings.MEDIA_ROOT, OUTPUT_FOLDER_NAME)
  679. OUTPUT_URL = os.path.join(settings.MEDIA_URL, OUTPUT_FOLDER_NAME).replace('\\', '/') # Use forward slashes for URL
  680. # Define log and status file paths within the new subfolder
  681. LOG_FILE_NAME = 'excel_generation.log'
  682. STATUS_FILE_NAME = 'excel_generation_status.json'
  683. EXCEL_FILE_NAME = 'generated_products.xlsx'
  684. LOG_FILE_PATH = os.path.join(OUTPUT_ROOT, LOG_FILE_NAME)
  685. STATUS_FILE_PATH = os.path.join(OUTPUT_ROOT, STATUS_FILE_NAME)
  686. EXCEL_FILE_PATH = os.path.join(OUTPUT_ROOT, EXCEL_FILE_NAME)
  687. # Ensure the OUTPUT_ROOT exists for files to be saved
  688. if not os.path.exists(OUTPUT_ROOT):
  689. os.makedirs(OUTPUT_ROOT)
  690. # Configure basic logging to the new path
  691. logging.basicConfig(
  692. filename=LOG_FILE_PATH,
  693. level=logging.INFO,
  694. format='%(asctime)s - %(levelname)s - %(message)s'
  695. )
  696. logger = logging.getLogger(__name__)
  697. # -------------------------------------------------------------------------------------------------
  698. def generate_product_excel_background():
  699. """
  700. Function to perform batch attribute extraction for all products and generate an Excel file.
  701. Runs in a background thread to avoid blocking the API response.
  702. Logs success/failure and saves a status file for external monitoring.
  703. """
  704. logger.info(f"[{datetime.now().isoformat()}] Starting background product Excel generation and attribute extraction.")
  705. successful = 0
  706. failed = 0
  707. results = [] # To store detailed extraction results for Excel sheet 2
  708. # Function to write status file (SUCCESS/FAILED)
  709. def write_status(status_type, error_msg=None):
  710. status_data = {
  711. "status": status_type,
  712. "timestamp": datetime.now().isoformat(),
  713. "products_processed": successful + failed,
  714. "products_successful": successful,
  715. "products_failed": failed,
  716. # FIX: Use the updated EXCEL_FILE_NAME and OUTPUT_URL
  717. "excel_path": os.path.join(OUTPUT_URL, EXCEL_FILE_NAME) if status_type == "SUCCESS" else None,
  718. "log_path": os.path.join(OUTPUT_URL, LOG_FILE_NAME),
  719. "error_message": error_msg
  720. }
  721. try:
  722. # FIX: STATUS_FILE_PATH is now inside generated_outputs
  723. with open(STATUS_FILE_PATH, 'w') as f:
  724. json.dump(status_data, f, indent=4)
  725. except Exception as e:
  726. logger.exception(f"CRITICAL ERROR: Failed to write status file at {STATUS_FILE_PATH}: {e}")
  727. try:
  728. # 1. PREFETCH all necessary related data to minimize database queries
  729. # Prefetch possible values for mandatory attributes
  730. possible_values_prefetch = Prefetch(
  731. 'attributes',
  732. queryset=ProductAttribute.objects.filter(is_mandatory=True).prefetch_related('possible_values')
  733. )
  734. # Fetch all ProductTypes with their mandatory attributes and possible values
  735. all_product_types = ProductType.objects.prefetch_related(possible_values_prefetch)
  736. product_type_map = {
  737. pt.name: pt for pt in all_product_types
  738. }
  739. # Prepare product_list for batch extraction
  740. all_products = Product.objects.all()
  741. product_list = []
  742. for p in all_products:
  743. mandatory_attrs_dict = {}
  744. product_type_name = p.product_type.strip() if p.product_type else None
  745. if product_type_name and product_type_name in product_type_map:
  746. pt = product_type_map[product_type_name]
  747. # Build the mandatory_attrs dictionary: { "Attribute Name": ["Value 1", "Value 2"], ... }
  748. for attr in pt.attributes.all():
  749. mandatory_attrs_dict[attr.name] = [
  750. pv.value for pv in attr.possible_values.all()
  751. ]
  752. product_list.append({
  753. "item_id": p.item_id,
  754. "product_type_name": product_type_name,
  755. "mandatory_attrs": mandatory_attrs_dict
  756. })
  757. # Batch settings (using defaults)
  758. model = "llama-3.1-8b-instant"
  759. extract_additional = True
  760. process_image = False
  761. multiple = []
  762. threshold_abs = 0.65
  763. margin = 0.15
  764. use_dynamic_thresholds = True
  765. use_adaptive_margin = True
  766. use_semantic_clustering = True
  767. # Batch extraction logic
  768. item_ids = [p['item_id'] for p in product_list]
  769. products_queryset = Product.objects.filter(item_id__in=item_ids)
  770. product_map = {product.item_id: product for product in products_queryset}
  771. found_ids = set(product_map.keys())
  772. for product_entry in product_list:
  773. item_id = product_entry['item_id']
  774. mandatory_attrs = product_entry['mandatory_attrs']
  775. if item_id not in found_ids:
  776. failed += 1
  777. results.append({
  778. "product_id": item_id,
  779. "error": "Product not found in database"
  780. })
  781. logger.warning(f"Product {item_id} not found in database. Skipping extraction.")
  782. continue
  783. product = product_map[item_id]
  784. try:
  785. title = product.product_name
  786. short_desc = product.product_short_description
  787. long_desc = product.product_long_description
  788. image_url = product.image_path
  789. ocr_results = None
  790. ocr_text = None
  791. visual_results = None
  792. if process_image and image_url:
  793. logger.info(f"Processing image for product {item_id}...")
  794. # OCR Processing
  795. ocr_service = OCRService()
  796. ocr_results = ocr_service.process_image(image_url)
  797. if ocr_results and ocr_results.get("detected_text"):
  798. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  799. ocr_results, model
  800. )
  801. ocr_results["extracted_attributes"] = ocr_attrs
  802. ocr_text = "\n".join([
  803. f"{item['text']} (confidence: {item['confidence']:.2f})"
  804. for item in ocr_results["detected_text"]
  805. ])
  806. # Visual Processing
  807. visual_service = VisualProcessingService()
  808. product_type_hint = product.product_type if product.product_type else None
  809. visual_results = visual_service.process_image(image_url, product_type_hint)
  810. if visual_results and visual_results.get('visual_attributes'):
  811. visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  812. visual_results['visual_attributes']
  813. )
  814. logger.info(f"Image processing done for product {item_id}.")
  815. # Combine product text with source tracking
  816. product_text, source_map = ProductAttributeService.combine_product_text(
  817. title=title,
  818. short_desc=short_desc,
  819. long_desc=long_desc,
  820. ocr_text=ocr_text
  821. )
  822. # Attribute Extraction with source tracking
  823. extracted = ProductAttributeService.extract_attributes(
  824. product_text=product_text,
  825. mandatory_attrs=mandatory_attrs,
  826. source_map=source_map,
  827. model=model,
  828. extract_additional=extract_additional,
  829. multiple=multiple,
  830. threshold_abs=threshold_abs,
  831. margin=margin,
  832. use_dynamic_thresholds=use_dynamic_thresholds,
  833. use_adaptive_margin=use_adaptive_margin,
  834. use_semantic_clustering=use_semantic_clustering
  835. )
  836. result = {
  837. "product_id": item_id,
  838. "mandatory": extracted.get("mandatory", {}),
  839. "additional": extracted.get("additional", {}),
  840. }
  841. if ocr_results:
  842. result["ocr_results"] = ocr_results
  843. if visual_results:
  844. result["visual_results"] = visual_results
  845. results.append(result)
  846. successful += 1
  847. logger.info(f"Attribute extraction successful for product {item_id}.")
  848. except Exception as e:
  849. failed += 1
  850. results.append({
  851. "product_id": item_id,
  852. "error": str(e)
  853. })
  854. logger.exception(f"Error during attribute extraction for product {item_id}.")
  855. logger.info(f"Batch extraction phase complete. Successful: {successful}, Failed: {failed}")
  856. # --------------------------------------------------------------------------------
  857. # Generate and save the Excel file
  858. # --------------------------------------------------------------------------------
  859. wb = Workbook()
  860. # Sheet 1: Products (from DB) (Logic is the same, skipped for brevity)
  861. ws_products = wb.active
  862. ws_products.title = "Products"
  863. products_headers = ['ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE', 'Product Short Description', 'Product Long Description', 'image_path']
  864. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  865. header_font = Font(bold=True, color="FFFFFF")
  866. for col_num, header in enumerate(products_headers, 1):
  867. cell = ws_products.cell(row=1, column=col_num)
  868. cell.value = header
  869. cell.fill = header_fill
  870. cell.font = header_font
  871. cell.alignment = Alignment(horizontal="center", vertical="center")
  872. all_products_db = Product.objects.all()
  873. for row_num, product in enumerate(all_products_db, 2):
  874. ws_products.cell(row=row_num, column=1, value=product.item_id)
  875. ws_products.cell(row=row_num, column=2, value=product.product_name)
  876. ws_products.cell(row=row_num, column=3, value=product.product_type)
  877. ws_products.cell(row=row_num, column=4, value=product.product_short_description)
  878. ws_products.cell(row=row_num, column=5, value=product.product_long_description)
  879. ws_products.cell(row=row_num, column=6, value=product.image_path)
  880. for col_dim, width in zip(['A', 'B', 'C', 'D', 'E', 'F'], [15, 25, 15, 35, 50, 45]):
  881. ws_products.column_dimensions[col_dim].width = width
  882. # Sheet 2: Attribute_values (Logic is the same, skipped for brevity)
  883. ws_attributes = wb.create_sheet("Attribute_values")
  884. attributes_headers = ['item_id', 'attribute_name', 'original_value', 'generated_value']
  885. for col_num, header in enumerate(attributes_headers, 1):
  886. cell = ws_attributes.cell(row=1, column=col_num)
  887. cell.value = header
  888. cell.fill = header_fill
  889. cell.font = header_font
  890. cell.alignment = Alignment(horizontal="center", vertical="center")
  891. row_num = 2
  892. all_original_attrs = ProductAttributeValue.objects.all()
  893. original_attrs_lookup = {
  894. (attr.product.item_id, attr.attribute_name): attr.original_value
  895. for attr in all_original_attrs
  896. }
  897. processed_original_keys = set()
  898. for res in results:
  899. # ... (Excel writing logic for generated/original attributes remains unchanged)
  900. item_id = res["product_id"]
  901. if "error" in res:
  902. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  903. if orig_item_id == item_id:
  904. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  905. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  906. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  907. ws_attributes.cell(row=row_num, column=4, value=f"Extraction Failed: {res['error']}")
  908. processed_original_keys.add((orig_item_id, orig_attr_name))
  909. row_num += 1
  910. continue
  911. generated_attrs = {}
  912. for cat in ["mandatory", "additional"]:
  913. attrs = res.get(cat, {})
  914. for attr_name, values in attrs.items():
  915. for val in values:
  916. key = (item_id, attr_name)
  917. if key not in generated_attrs:
  918. generated_attrs[key] = []
  919. generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
  920. ocr = res.get("ocr_results")
  921. if ocr and "extracted_attributes" in ocr and isinstance(ocr["extracted_attributes"], dict):
  922. for attr_name, values in ocr["extracted_attributes"].items():
  923. for val in values:
  924. key = (item_id, attr_name)
  925. if key not in generated_attrs:
  926. generated_attrs[key] = []
  927. generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
  928. visual = res.get("visual_results")
  929. if visual and "visual_attributes" in visual:
  930. vis_attrs = visual["visual_attributes"]
  931. if isinstance(vis_attrs, dict):
  932. for attr_name, values in vis_attrs.items():
  933. if not isinstance(values, list):
  934. values = [{"value": values, "source": "visual"}]
  935. for val in values:
  936. key = (item_id, attr_name)
  937. if key not in generated_attrs:
  938. generated_attrs[key] = []
  939. generated_attrs[key].append(f"{val['value']} (source: {val.get('source', 'visual')})")
  940. elif isinstance(vis_attrs, list):
  941. for item in vis_attrs:
  942. attr_name = item.get("attribute_name") or item.get("name")
  943. if not attr_name: continue
  944. value = item.get("value", "")
  945. source = item.get("source", "visual")
  946. key = (item_id, attr_name)
  947. if key not in generated_attrs:
  948. generated_attrs[key] = []
  949. generated_attrs[key].append(f"{value} (source: {source})")
  950. for (attr_item_id, attr_name), gen_values in generated_attrs.items():
  951. original_value = original_attrs_lookup.get((attr_item_id, attr_name), "")
  952. generated_value = "; ".join(gen_values) if gen_values else ""
  953. ws_attributes.cell(row=row_num, column=1, value=attr_item_id)
  954. ws_attributes.cell(row=row_num, column=2, value=attr_name)
  955. ws_attributes.cell(row=row_num, column=3, value=original_value)
  956. ws_attributes.cell(row=row_num, column=4, value=generated_value)
  957. processed_original_keys.add((attr_item_id, attr_name))
  958. row_num += 1
  959. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  960. if orig_item_id == item_id and (orig_item_id, orig_attr_name) not in processed_original_keys:
  961. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  962. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  963. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  964. ws_attributes.cell(row=row_num, column=4, value="")
  965. processed_original_keys.add((orig_item_id, orig_attr_name))
  966. row_num += 1
  967. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  968. if (orig_item_id, orig_attr_name) not in processed_original_keys:
  969. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  970. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  971. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  972. ws_attributes.cell(row=row_num, column=4, value="Original value only (Product not processed in batch)")
  973. row_num += 1
  974. for col_dim, width in zip(['A', 'B', 'C', 'D'], [15, 35, 50, 50]):
  975. ws_attributes.column_dimensions[col_dim].width = width
  976. # FIX: Save to the new EXCEL_FILE_PATH
  977. wb.save(EXCEL_FILE_PATH)
  978. logger.info(f"Excel file successfully saved to {EXCEL_FILE_PATH}")
  979. # Write SUCCESS status
  980. write_status("SUCCESS")
  981. logger.info("Background task finished successfully.")
  982. except Exception as e:
  983. # Log the critical error and write FAILED status
  984. logger.exception("CRITICAL ERROR during background Excel generation process.")
  985. write_status("FAILED", error_msg=str(e))
  986. # -------------------------------------------------------------------------------------------------
  987. class ProductUploadExcelView(APIView):
  988. """
  989. POST API to upload an Excel file.
  990. """
  991. parser_classes = (MultiPartParser, FormParser)
  992. def post(self, request, *args, **kwargs):
  993. file_obj = request.FILES.get('file')
  994. if not file_obj:
  995. return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  996. try:
  997. # ... (Upload and DB processing logic remains unchanged)
  998. # Read all sheets from Excel file
  999. excel_file = pd.ExcelFile(file_obj)
  1000. # Check if required sheets exist
  1001. if 'Products' not in excel_file.sheet_names:
  1002. logger.error(f"Upload failed: Missing 'Products' sheet in file.")
  1003. return Response({
  1004. 'error': "Missing 'Products' sheet",
  1005. 'available_sheets': excel_file.sheet_names
  1006. }, status=status.HTTP_400_BAD_REQUEST)
  1007. df_products = pd.read_excel(excel_file, sheet_name='Products')
  1008. df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
  1009. expected_product_cols = {
  1010. 'item_id', 'product_name', 'product_long_description',
  1011. 'product_short_description', 'product_type', 'image_path'
  1012. }
  1013. if not expected_product_cols.issubset(df_products.columns):
  1014. logger.error(f"Upload failed: Missing required columns in Products sheet.")
  1015. return Response({
  1016. 'error': 'Missing required columns in Products sheet',
  1017. 'required_columns': list(expected_product_cols),
  1018. 'found_columns': list(df_products.columns)
  1019. }, status=status.HTTP_400_BAD_REQUEST)
  1020. df_attributes = None
  1021. has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
  1022. if has_attributes_sheet:
  1023. df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
  1024. df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
  1025. expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
  1026. if not expected_attr_cols.issubset(df_attributes.columns):
  1027. logger.error(f"Upload failed: Missing required columns in Attribute_values sheet.")
  1028. return Response({
  1029. 'error': 'Missing required columns in Attribute_values sheet',
  1030. 'required_columns': list(expected_attr_cols),
  1031. 'found_columns': list(df_attributes.columns)
  1032. }, status=status.HTTP_400_BAD_REQUEST)
  1033. products_created = 0
  1034. products_updated = 0
  1035. attributes_created = 0
  1036. attributes_updated = 0
  1037. products_failed = 0
  1038. attributes_failed = 0
  1039. errors = []
  1040. with transaction.atomic():
  1041. for idx, row in df_products.iterrows():
  1042. item_id = str(row.get('item_id', '')).strip()
  1043. product_type = str(row.get('product_type', '')).strip()
  1044. if not item_id:
  1045. products_failed += 1
  1046. errors.append(f"Products Row {idx + 2}: Missing item_id")
  1047. continue
  1048. try:
  1049. if product_type:
  1050. ProductType.objects.get_or_create(name=product_type)
  1051. defaults = {
  1052. 'product_name': str(row.get('product_name', '')),
  1053. 'product_long_description': str(row.get('product_long_description', '')),
  1054. 'product_short_description': str(row.get('product_short_description', '')),
  1055. 'product_type': product_type,
  1056. 'image_path': str(row.get('image_path', '')),
  1057. }
  1058. obj, created = Product.objects.update_or_create(item_id=item_id, defaults=defaults)
  1059. if created: products_created += 1
  1060. else: products_updated += 1
  1061. except Exception as e:
  1062. products_failed += 1
  1063. errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
  1064. logger.error(f"Error processing product {item_id} in Products sheet: {e}")
  1065. if has_attributes_sheet and df_attributes is not None:
  1066. item_ids_in_attrs = df_attributes['item_id'].astype(str).unique()
  1067. existing_products = {p.item_id: p for p in Product.objects.filter(item_id__in=item_ids_in_attrs)}
  1068. for idx, row in df_attributes.iterrows():
  1069. item_id = str(row.get('item_id', '')).strip()
  1070. attribute_name = str(row.get('attribute_name', '')).strip()
  1071. original_value = str(row.get('original_value', '')).strip()
  1072. if not item_id or not attribute_name:
  1073. attributes_failed += 1
  1074. errors.append(f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name")
  1075. continue
  1076. product = existing_products.get(item_id)
  1077. if not product:
  1078. attributes_failed += 1
  1079. errors.append(f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. Make sure it exists in Products sheet.")
  1080. continue
  1081. try:
  1082. attr_obj, created = ProductAttributeValue.objects.update_or_create(
  1083. product=product,
  1084. attribute_name=attribute_name,
  1085. defaults={'original_value': original_value}
  1086. )
  1087. if created: attributes_created += 1
  1088. else: attributes_updated += 1
  1089. except Exception as e:
  1090. attributes_failed += 1
  1091. errors.append(f"Attribute_values Row {idx + 2} (item_id: {item_id}, attribute: {attribute_name}): {str(e)}")
  1092. logger.error(f"Error processing attribute {attribute_name} for product {item_id}: {e}")
  1093. # Prepare response data
  1094. response_data = {
  1095. 'message': 'Upload completed',
  1096. 'products': {
  1097. 'created': products_created, 'updated': products_updated, 'failed': products_failed,
  1098. 'total_processed': products_created + products_updated + products_failed
  1099. },
  1100. 'attribute_values': {
  1101. 'created': attributes_created, 'updated': attributes_updated, 'failed': attributes_failed,
  1102. 'total_processed': attributes_created + attributes_updated + attributes_failed
  1103. } if has_attributes_sheet else {'message': 'Attribute_values sheet not found in Excel file'},
  1104. 'generated_excel_status': 'Excel generation started in the background.'
  1105. }
  1106. if errors:
  1107. response_data['errors'] = errors[:50]
  1108. if len(errors) > 50:
  1109. response_data['errors'].append(f"... and {len(errors) - 50} more errors")
  1110. upload_status = status.HTTP_201_CREATED if products_failed == 0 and attributes_failed == 0 else status.HTTP_207_MULTI_STATUS
  1111. # Start background thread for Excel generation if upload was successful
  1112. if products_failed == 0 and attributes_failed == 0:
  1113. logger.info("API call successful. Triggering background Excel generation thread.")
  1114. threading.Thread(target=generate_product_excel_background, daemon=True).start()
  1115. # FIX: Update monitoring URLs to point to the new generated_outputs subfolder
  1116. response_data['generated_excel_status'] = 'Background Excel generation triggered successfully.'
  1117. response_data['monitoring'] = {
  1118. 'excel_file': os.path.join(OUTPUT_URL, EXCEL_FILE_NAME),
  1119. 'status_file': os.path.join(OUTPUT_URL, STATUS_FILE_NAME),
  1120. 'log_file': os.path.join(OUTPUT_URL, LOG_FILE_NAME),
  1121. 'note': 'These files will be available once the background process completes.'
  1122. }
  1123. else:
  1124. logger.warning(f"API call finished with errors ({products_failed} products, {attributes_failed} attributes). Not triggering background excel generation.")
  1125. response_data['generated_excel_status'] = 'Background Excel generation was NOT triggered due to upload errors. Fix upload errors and re-upload.'
  1126. return Response(response_data, status=upload_status)
  1127. except pd.errors.EmptyDataError:
  1128. logger.error('The uploaded Excel file is empty or invalid.')
  1129. return Response({'error': 'The uploaded Excel file is empty or invalid'}, status=status.HTTP_400_BAD_REQUEST)
  1130. except Exception as e:
  1131. logger.exception(f'An unexpected error occurred while processing the file.')
  1132. return Response({'error': f'An unexpected error occurred while processing the file: {str(e)}'}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1133. # Add this view to your views.py for downloading a template
  1134. from django.http import HttpResponse
  1135. from openpyxl import Workbook
  1136. from openpyxl.styles import Font, PatternFill, Alignment
  1137. from rest_framework.views import APIView
  1138. import io
  1139. class DownloadExcelTemplateView(APIView):
  1140. """
  1141. GET API to download an Excel template with two sheets:
  1142. 1. Products sheet with sample data
  1143. 2. Attribute_values sheet with sample data
  1144. """
  1145. def get(self, request):
  1146. # Create a new workbook
  1147. wb = Workbook()
  1148. # Remove default sheet
  1149. if 'Sheet' in wb.sheetnames:
  1150. wb.remove(wb['Sheet'])
  1151. # ===== Create Products Sheet =====
  1152. ws_products = wb.create_sheet("Products", 0)
  1153. # Define headers for Products
  1154. products_headers = [
  1155. 'ITEM ID',
  1156. 'PRODUCT NAME',
  1157. 'PRODUCT TYPE',
  1158. 'Product Short Description',
  1159. 'Product Long Description',
  1160. 'image_path'
  1161. ]
  1162. # Style for headers
  1163. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  1164. header_font = Font(bold=True, color="FFFFFF")
  1165. # Add headers to Products sheet
  1166. for col_num, header in enumerate(products_headers, 1):
  1167. cell = ws_products.cell(row=1, column=col_num)
  1168. cell.value = header
  1169. cell.fill = header_fill
  1170. cell.font = header_font
  1171. cell.alignment = Alignment(horizontal="center", vertical="center")
  1172. # Add sample data to Products sheet
  1173. sample_products = [
  1174. [
  1175. '3217373735',
  1176. 'Blue V-Neck T-Shirt',
  1177. 'Clothing',
  1178. 'Stylish blue t-shirt with v-neck design',
  1179. 'Premium quality cotton t-shirt featuring a classic v-neck design. Perfect for casual wear. Available in vibrant blue color.',
  1180. 'https://images.unsplash.com/photo-1521572163474-6864f9cf17ab'
  1181. ],
  1182. [
  1183. '1234567890',
  1184. 'Red Cotton Dress',
  1185. 'Clothing',
  1186. 'Beautiful red dress for special occasions',
  1187. 'Elegant red dress made from 100% cotton fabric. Features a flowing design perfect for summer events and parties.',
  1188. 'https://images.unsplash.com/photo-1595777457583-95e059d581b8'
  1189. ],
  1190. [
  1191. '9876543210',
  1192. 'Steel Screws Pack',
  1193. 'Hardware',
  1194. 'Pack of zinc plated steel screws',
  1195. 'Professional grade steel screws with zinc plating for corrosion resistance. Pack contains 50 pieces, 2 inch length, M6 thread size.',
  1196. 'https://images.unsplash.com/photo-1542272604-787c3835535d'
  1197. ]
  1198. ]
  1199. for row_num, row_data in enumerate(sample_products, 2):
  1200. for col_num, value in enumerate(row_data, 1):
  1201. ws_products.cell(row=row_num, column=col_num, value=value)
  1202. # Adjust column widths for Products sheet
  1203. ws_products.column_dimensions['A'].width = 15 # ITEM ID
  1204. ws_products.column_dimensions['B'].width = 25 # PRODUCT NAME
  1205. ws_products.column_dimensions['C'].width = 15 # PRODUCT TYPE
  1206. ws_products.column_dimensions['D'].width = 35 # Short Description
  1207. ws_products.column_dimensions['E'].width = 50 # Long Description
  1208. ws_products.column_dimensions['F'].width = 45 # image_path
  1209. # ===== Create Attribute_values Sheet =====
  1210. ws_attributes = wb.create_sheet("Attribute_values", 1)
  1211. # Define headers for Attribute_values
  1212. attributes_headers = ['item_id', 'attribute_name', 'original_value']
  1213. # Add headers to Attribute_values sheet
  1214. for col_num, header in enumerate(attributes_headers, 1):
  1215. cell = ws_attributes.cell(row=1, column=col_num)
  1216. cell.value = header
  1217. cell.fill = header_fill
  1218. cell.font = header_font
  1219. cell.alignment = Alignment(horizontal="center", vertical="center")
  1220. # Add sample data to Attribute_values sheet
  1221. sample_attributes = [
  1222. ['3217373735', 'Clothing Neck Style', 'V-Neck Square'],
  1223. ['3217373735', 'Condition', 'New with tags'],
  1224. ['3217373735', 'Material', '100% Cotton'],
  1225. ['3217373735', 'Color', 'Sky Blue'],
  1226. ['3217373735', 'Size', 'Medium'],
  1227. ['1234567890', 'Sleeve Length', 'Sleeveless'],
  1228. ['1234567890', 'Condition', 'Brand New'],
  1229. ['1234567890', 'Pattern', 'Solid'],
  1230. ['1234567890', 'Material', 'Cotton Blend'],
  1231. ['1234567890', 'Color', 'Crimson Red'],
  1232. ['9876543210', 'Material', 'Stainless Steel'],
  1233. ['9876543210', 'Thread Size', 'M6'],
  1234. ['9876543210', 'Length', '2 inches'],
  1235. ['9876543210', 'Coating', 'Zinc Plated'],
  1236. ['9876543210', 'Package Quantity', '50 pieces'],
  1237. ]
  1238. for row_num, row_data in enumerate(sample_attributes, 2):
  1239. for col_num, value in enumerate(row_data, 1):
  1240. ws_attributes.cell(row=row_num, column=col_num, value=value)
  1241. # Adjust column widths for Attribute_values sheet
  1242. ws_attributes.column_dimensions['A'].width = 15 # item_id
  1243. ws_attributes.column_dimensions['B'].width = 25 # attribute_name
  1244. ws_attributes.column_dimensions['C'].width = 30 # original_value
  1245. # Add instructions sheet
  1246. ws_instructions = wb.create_sheet("Instructions", 2)
  1247. instructions_text = [
  1248. ['Excel Upload Instructions', ''],
  1249. ['', ''],
  1250. ['Sheet 1: Products', ''],
  1251. ['- Contains product basic information', ''],
  1252. ['- All columns are required', ''],
  1253. ['- ITEM ID must be unique', ''],
  1254. ['', ''],
  1255. ['Sheet 2: Attribute_values', ''],
  1256. ['- Contains original/manual attribute values', ''],
  1257. ['- item_id must match an ITEM ID from Products sheet', ''],
  1258. ['- Multiple rows can have the same item_id (for different attributes)', ''],
  1259. ['- Each attribute per product should be on a separate row', ''],
  1260. ['', ''],
  1261. ['Upload Process:', ''],
  1262. ['1. Fill in your product data in the Products sheet', ''],
  1263. ['2. Fill in attribute values in the Attribute_values sheet', ''],
  1264. ['3. Ensure item_id values match between both sheets', ''],
  1265. ['4. Save the file and upload via API', ''],
  1266. ['', ''],
  1267. ['Notes:', ''],
  1268. ['- Do not change sheet names (must be "Products" and "Attribute_values")', ''],
  1269. ['- Do not change column header names', ''],
  1270. ['- You can delete the sample data rows', ''],
  1271. ['- You can delete this Instructions sheet before uploading', ''],
  1272. ]
  1273. for row_num, row_data in enumerate(instructions_text, 1):
  1274. ws_instructions.cell(row=row_num, column=1, value=row_data[0])
  1275. if row_num == 1:
  1276. cell = ws_instructions.cell(row=row_num, column=1)
  1277. cell.font = Font(bold=True, size=14)
  1278. ws_instructions.column_dimensions['A'].width = 60
  1279. # Save to BytesIO
  1280. output = io.BytesIO()
  1281. wb.save(output)
  1282. output.seek(0)
  1283. # Create response
  1284. response = HttpResponse(
  1285. output.getvalue(),
  1286. content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
  1287. )
  1288. response['Content-Disposition'] = 'attachment; filename=product_upload_template.xlsx'
  1289. return response
  1290. class DownloadProductsWithAttributesExcelView(APIView):
  1291. """
  1292. GET API to download existing products with their attribute values as Excel.
  1293. Useful for users to update existing data.
  1294. """
  1295. def get(self, request):
  1296. from .models import Product, ProductAttributeValue
  1297. # Create workbook
  1298. wb = Workbook()
  1299. if 'Sheet' in wb.sheetnames:
  1300. wb.remove(wb['Sheet'])
  1301. # ===== Products Sheet =====
  1302. ws_products = wb.create_sheet("Products", 0)
  1303. # Headers
  1304. products_headers = [
  1305. 'ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE',
  1306. 'Product Short Description', 'Product Long Description', 'image_path'
  1307. ]
  1308. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  1309. header_font = Font(bold=True, color="FFFFFF")
  1310. for col_num, header in enumerate(products_headers, 1):
  1311. cell = ws_products.cell(row=1, column=col_num)
  1312. cell.value = header
  1313. cell.fill = header_fill
  1314. cell.font = header_font
  1315. cell.alignment = Alignment(horizontal="center", vertical="center")
  1316. # Fetch and add product data
  1317. products = Product.objects.all()
  1318. for row_num, product in enumerate(products, 2):
  1319. ws_products.cell(row=row_num, column=1, value=product.item_id)
  1320. ws_products.cell(row=row_num, column=2, value=product.product_name)
  1321. ws_products.cell(row=row_num, column=3, value=product.product_type)
  1322. ws_products.cell(row=row_num, column=4, value=product.product_short_description)
  1323. ws_products.cell(row=row_num, column=5, value=product.product_long_description)
  1324. ws_products.cell(row=row_num, column=6, value=product.image_path)
  1325. # Adjust widths
  1326. ws_products.column_dimensions['A'].width = 15
  1327. ws_products.column_dimensions['B'].width = 25
  1328. ws_products.column_dimensions['C'].width = 15
  1329. ws_products.column_dimensions['D'].width = 35
  1330. ws_products.column_dimensions['E'].width = 50
  1331. ws_products.column_dimensions['F'].width = 45
  1332. # ===== Attribute_values Sheet =====
  1333. ws_attributes = wb.create_sheet("Attribute_values", 1)
  1334. attributes_headers = ['item_id', 'attribute_name', 'original_value']
  1335. for col_num, header in enumerate(attributes_headers, 1):
  1336. cell = ws_attributes.cell(row=1, column=col_num)
  1337. cell.value = header
  1338. cell.fill = header_fill
  1339. cell.font = header_font
  1340. cell.alignment = Alignment(horizontal="center", vertical="center")
  1341. # Fetch and add attribute values
  1342. attributes = ProductAttributeValue.objects.select_related('product').all()
  1343. for row_num, attr in enumerate(attributes, 2):
  1344. ws_attributes.cell(row=row_num, column=1, value=attr.product.item_id)
  1345. ws_attributes.cell(row=row_num, column=2, value=attr.attribute_name)
  1346. ws_attributes.cell(row=row_num, column=3, value=attr.original_value)
  1347. ws_attributes.column_dimensions['A'].width = 15
  1348. ws_attributes.column_dimensions['B'].width = 25
  1349. ws_attributes.column_dimensions['C'].width = 30
  1350. # Save to BytesIO
  1351. output = io.BytesIO()
  1352. wb.save(output)
  1353. output.seek(0)
  1354. response = HttpResponse(
  1355. output.getvalue(),
  1356. content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
  1357. )
  1358. response['Content-Disposition'] = 'attachment; filename=products_export.xlsx'
  1359. return response
  1360. class ProductAttributesUploadView(APIView):
  1361. """
  1362. POST API to upload an Excel file and add mandatory/additional attributes
  1363. for product types with possible values.
  1364. """
  1365. parser_classes = (MultiPartParser, FormParser)
  1366. def post(self, request):
  1367. file_obj = request.FILES.get('file')
  1368. if not file_obj:
  1369. return Response({"error": "No file provided."}, status=status.HTTP_400_BAD_REQUEST)
  1370. try:
  1371. df = pd.read_excel(file_obj)
  1372. required_columns = {'product_type', 'attribute_name', 'is_mandatory', 'possible_values'}
  1373. if not required_columns.issubset(df.columns):
  1374. return Response({
  1375. "error": f"Missing required columns. Found: {list(df.columns)}"
  1376. }, status=status.HTTP_400_BAD_REQUEST)
  1377. for _, row in df.iterrows():
  1378. product_type_name = str(row['product_type']).strip()
  1379. attr_name = str(row['attribute_name']).strip()
  1380. is_mandatory = str(row['is_mandatory']).strip().lower() in ['yes', 'true', '1']
  1381. possible_values = str(row.get('possible_values', '')).strip()
  1382. # Get or create product type
  1383. product_type, _ = ProductType.objects.get_or_create(name=product_type_name)
  1384. # Get or create attribute
  1385. attribute, _ = ProductAttribute.objects.get_or_create(
  1386. product_type=product_type,
  1387. name=attr_name,
  1388. defaults={'is_mandatory': is_mandatory}
  1389. )
  1390. attribute.is_mandatory = is_mandatory
  1391. attribute.save()
  1392. # Handle possible values
  1393. AttributePossibleValue.objects.filter(attribute=attribute).delete()
  1394. if possible_values:
  1395. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1396. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1397. return Response({"message": "Attributes uploaded successfully."}, status=status.HTTP_201_CREATED)
  1398. except Exception as e:
  1399. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1400. class ProductTypeAttributesView(APIView):
  1401. """
  1402. API to view, create, update, and delete product type attributes and their possible values.
  1403. Also supports dynamic product type creation.
  1404. """
  1405. def get(self, request):
  1406. """
  1407. Retrieve all product types with their attributes and possible values.
  1408. """
  1409. product_types = ProductType.objects.all()
  1410. serializer = ProductTypeSerializer(product_types, many=True)
  1411. # Transform the serialized data into the requested format
  1412. result = []
  1413. for pt in serializer.data:
  1414. for attr in pt['attributes']:
  1415. result.append({
  1416. 'product_type': pt['name'],
  1417. 'attribute_name': attr['name'],
  1418. 'is_mandatory': 'Yes' if attr['is_mandatory'] else 'No',
  1419. 'possible_values': ', '.join([pv['value'] for pv in attr['possible_values']])
  1420. })
  1421. return Response(result, status=status.HTTP_200_OK)
  1422. def post(self, request):
  1423. """
  1424. Create a new product type or attribute with possible values.
  1425. Expected payload example:
  1426. {
  1427. "product_type": "Hardware Screws",
  1428. "attribute_name": "Material",
  1429. "is_mandatory": "Yes",
  1430. "possible_values": "Steel, Zinc Plated, Stainless Steel"
  1431. }
  1432. """
  1433. try:
  1434. product_type_name = request.data.get('product_type')
  1435. attribute_name = request.data.get('attribute_name', '')
  1436. is_mandatory = request.data.get('is_mandatory', '').lower() in ['yes', 'true', '1']
  1437. possible_values = request.data.get('possible_values', '')
  1438. if not product_type_name:
  1439. return Response({
  1440. "error": "product_type is required"
  1441. }, status=status.HTTP_400_BAD_REQUEST)
  1442. with transaction.atomic():
  1443. # Get or create product type
  1444. product_type, created = ProductType.objects.get_or_create(name=product_type_name)
  1445. if created and not attribute_name:
  1446. return Response({
  1447. "message": f"Product type '{product_type_name}' created successfully",
  1448. "data": {"product_type": product_type_name}
  1449. }, status=status.HTTP_201_CREATED)
  1450. if attribute_name:
  1451. # Create attribute
  1452. attribute, attr_created = ProductAttribute.objects.get_or_create(
  1453. product_type=product_type,
  1454. name=attribute_name,
  1455. defaults={'is_mandatory': is_mandatory}
  1456. )
  1457. if not attr_created:
  1458. return Response({
  1459. "error": f"Attribute '{attribute_name}' already exists for product type '{product_type_name}'"
  1460. }, status=status.HTTP_400_BAD_REQUEST)
  1461. # Handle possible values
  1462. if possible_values:
  1463. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1464. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1465. return Response({
  1466. "message": "Attribute created successfully",
  1467. "data": {
  1468. "product_type": product_type_name,
  1469. "attribute_name": attribute_name,
  1470. "is_mandatory": "Yes" if is_mandatory else "No",
  1471. "possible_values": possible_values
  1472. }
  1473. }, status=status.HTTP_201_CREATED)
  1474. return Response({
  1475. "message": f"Product type '{product_type_name}' already exists",
  1476. "data": {"product_type": product_type_name}
  1477. }, status=status.HTTP_200_OK)
  1478. except Exception as e:
  1479. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1480. def put(self, request):
  1481. """
  1482. Update an existing product type attribute and its possible values.
  1483. Expected payload example:
  1484. {
  1485. "product_type": "Hardware Screws",
  1486. "attribute_name": "Material",
  1487. "is_mandatory": "Yes",
  1488. "possible_values": "Steel, Zinc Plated, Stainless Steel, Brass"
  1489. }
  1490. """
  1491. try:
  1492. product_type_name = request.data.get('product_type')
  1493. attribute_name = request.data.get('attribute_name')
  1494. is_mandatory = request.data.get('is_mandatory', '').lower() in ['yes', 'true', '1']
  1495. possible_values = request.data.get('possible_values', '')
  1496. if not all([product_type_name, attribute_name]):
  1497. return Response({
  1498. "error": "product_type and attribute_name are required"
  1499. }, status=status.HTTP_400_BAD_REQUEST)
  1500. with transaction.atomic():
  1501. try:
  1502. product_type = ProductType.objects.get(name=product_type_name)
  1503. attribute = ProductAttribute.objects.get(
  1504. product_type=product_type,
  1505. name=attribute_name
  1506. )
  1507. except ProductType.DoesNotExist:
  1508. return Response({
  1509. "error": f"Product type '{product_type_name}' not found"
  1510. }, status=status.HTTP_404_NOT_FOUND)
  1511. except ProductAttribute.DoesNotExist:
  1512. return Response({
  1513. "error": f"Attribute '{attribute_name}' not found for product type '{product_type_name}'"
  1514. }, status=status.HTTP_404_NOT_FOUND)
  1515. # Update attribute
  1516. attribute.is_mandatory = is_mandatory
  1517. attribute.save()
  1518. # Update possible values
  1519. AttributePossibleValue.objects.filter(attribute=attribute).delete()
  1520. if possible_values:
  1521. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1522. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1523. return Response({
  1524. "message": "Attribute updated successfully",
  1525. "data": {
  1526. "product_type": product_type_name,
  1527. "attribute_name": attribute_name,
  1528. "is_mandatory": "Yes" if is_mandatory else "No",
  1529. "possible_values": possible_values
  1530. }
  1531. }, status=status.HTTP_200_OK)
  1532. except Exception as e:
  1533. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1534. def delete(self, request):
  1535. """
  1536. Delete a product type or a specific attribute.
  1537. Expected payload example:
  1538. {
  1539. "product_type": "Hardware Screws",
  1540. "attribute_name": "Material"
  1541. }
  1542. """
  1543. try:
  1544. product_type_name = request.data.get('product_type')
  1545. attribute_name = request.data.get('attribute_name', '')
  1546. if not product_type_name:
  1547. return Response({
  1548. "error": "product_type is required"
  1549. }, status=status.HTTP_400_BAD_REQUEST)
  1550. with transaction.atomic():
  1551. try:
  1552. product_type = ProductType.objects.get(name=product_type_name)
  1553. except ProductType.DoesNotExist:
  1554. return Response({
  1555. "error": f"Product type '{product_type_name}' not found"
  1556. }, status=status.HTTP_404_NOT_FOUND)
  1557. if attribute_name:
  1558. # Delete specific attribute
  1559. try:
  1560. attribute = ProductAttribute.objects.get(
  1561. product_type=product_type,
  1562. name=attribute_name
  1563. )
  1564. attribute.delete()
  1565. return Response({
  1566. "message": f"Attribute '{attribute_name}' deleted successfully from product type '{product_type_name}'"
  1567. }, status=status.HTTP_200_OK)
  1568. except ProductAttribute.DoesNotExist:
  1569. return Response({
  1570. "error": f"Attribute '{attribute_name}' not found for product type '{product_type_name}'"
  1571. }, status=status.HTTP_404_NOT_FOUND)
  1572. else:
  1573. # Delete entire product type
  1574. product_type.delete()
  1575. return Response({
  1576. "message": f"Product type '{product_type_name}' and all its attributes deleted successfully"
  1577. }, status=status.HTTP_200_OK)
  1578. except Exception as e:
  1579. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1580. class ProductTypeListView(APIView):
  1581. """
  1582. GET API to list all product types (only names).
  1583. """
  1584. def get(self, request):
  1585. product_types = ProductType.objects.values_list('name', flat=True)
  1586. return Response({"product_types": list(product_types)}, status=status.HTTP_200_OK)
  1587. # Add these views to your views.py
  1588. from rest_framework.views import APIView
  1589. from rest_framework.response import Response
  1590. from rest_framework import status
  1591. from rest_framework.parsers import MultiPartParser, FormParser
  1592. from django.db import transaction
  1593. import pandas as pd
  1594. from .models import Product, ProductAttributeValue
  1595. from .serializers import (
  1596. ProductAttributeValueSerializer,
  1597. ProductAttributeValueInputSerializer,
  1598. BulkProductAttributeValueSerializer,
  1599. ProductWithAttributesSerializer
  1600. )
  1601. class ProductAttributeValueView(APIView):
  1602. """
  1603. API to manage manually entered original attribute values.
  1604. GET: Retrieve all attribute values for a product
  1605. POST: Create or update attribute values for a product
  1606. DELETE: Delete attribute values
  1607. """
  1608. def get(self, request):
  1609. """
  1610. Get original attribute values for a specific product or all products.
  1611. Query params: item_id (optional)
  1612. """
  1613. item_id = request.query_params.get('item_id')
  1614. if item_id:
  1615. try:
  1616. product = Product.objects.get(item_id=item_id)
  1617. values = ProductAttributeValue.objects.filter(product=product)
  1618. serializer = ProductAttributeValueSerializer(values, many=True)
  1619. return Response({
  1620. "item_id": item_id,
  1621. "attributes": serializer.data
  1622. }, status=status.HTTP_200_OK)
  1623. except Product.DoesNotExist:
  1624. return Response({
  1625. "error": f"Product with item_id '{item_id}' not found"
  1626. }, status=status.HTTP_404_NOT_FOUND)
  1627. else:
  1628. # Return all attribute values grouped by product
  1629. values = ProductAttributeValue.objects.all().select_related('product')
  1630. serializer = ProductAttributeValueSerializer(values, many=True)
  1631. return Response(serializer.data, status=status.HTTP_200_OK)
  1632. def post(self, request):
  1633. """
  1634. Create or update original attribute value for a product.
  1635. Expected payload:
  1636. {
  1637. "item_id": "3217373735",
  1638. "attribute_name": "Clothing Neck Style",
  1639. "original_value": "V-Neck Square"
  1640. }
  1641. """
  1642. serializer = ProductAttributeValueInputSerializer(data=request.data)
  1643. if not serializer.is_valid():
  1644. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  1645. validated_data = serializer.validated_data
  1646. item_id = validated_data['item_id']
  1647. attribute_name = validated_data['attribute_name']
  1648. original_value = validated_data['original_value']
  1649. try:
  1650. product = Product.objects.get(item_id=item_id)
  1651. except Product.DoesNotExist:
  1652. return Response({
  1653. "error": f"Product with item_id '{item_id}' not found"
  1654. }, status=status.HTTP_404_NOT_FOUND)
  1655. # Create or update the attribute value
  1656. attr_value, created = ProductAttributeValue.objects.update_or_create(
  1657. product=product,
  1658. attribute_name=attribute_name,
  1659. defaults={'original_value': original_value}
  1660. )
  1661. response_serializer = ProductAttributeValueSerializer(attr_value)
  1662. return Response({
  1663. "message": "Attribute value created" if created else "Attribute value updated",
  1664. "data": response_serializer.data
  1665. }, status=status.HTTP_201_CREATED if created else status.HTTP_200_OK)
  1666. def delete(self, request):
  1667. """
  1668. Delete original attribute value(s).
  1669. Expected payload:
  1670. {
  1671. "item_id": "3217373735",
  1672. "attribute_name": "Clothing Neck Style" # Optional, if not provided deletes all for product
  1673. }
  1674. """
  1675. item_id = request.data.get('item_id')
  1676. attribute_name = request.data.get('attribute_name')
  1677. if not item_id:
  1678. return Response({
  1679. "error": "item_id is required"
  1680. }, status=status.HTTP_400_BAD_REQUEST)
  1681. try:
  1682. product = Product.objects.get(item_id=item_id)
  1683. except Product.DoesNotExist:
  1684. return Response({
  1685. "error": f"Product with item_id '{item_id}' not found"
  1686. }, status=status.HTTP_404_NOT_FOUND)
  1687. if attribute_name:
  1688. # Delete specific attribute
  1689. deleted_count, _ = ProductAttributeValue.objects.filter(
  1690. product=product,
  1691. attribute_name=attribute_name
  1692. ).delete()
  1693. if deleted_count == 0:
  1694. return Response({
  1695. "error": f"Attribute '{attribute_name}' not found for product '{item_id}'"
  1696. }, status=status.HTTP_404_NOT_FOUND)
  1697. return Response({
  1698. "message": f"Attribute '{attribute_name}' deleted successfully"
  1699. }, status=status.HTTP_200_OK)
  1700. else:
  1701. # Delete all attributes for product
  1702. deleted_count, _ = ProductAttributeValue.objects.filter(product=product).delete()
  1703. return Response({
  1704. "message": f"Deleted {deleted_count} attribute(s) for product '{item_id}'"
  1705. }, status=status.HTTP_200_OK)
  1706. class BulkProductAttributeValueView(APIView):
  1707. """
  1708. API for bulk operations on original attribute values.
  1709. POST: Create/update multiple attribute values at once
  1710. """
  1711. def post(self, request):
  1712. """
  1713. Bulk create or update attribute values for multiple products.
  1714. Expected payload:
  1715. {
  1716. "products": [
  1717. {
  1718. "item_id": "3217373735",
  1719. "attributes": {
  1720. "Clothing Neck Style": "V-Neck Square",
  1721. "Condition": "New with tags"
  1722. }
  1723. },
  1724. {
  1725. "item_id": "1234567890",
  1726. "attributes": {
  1727. "Material": "Cotton",
  1728. "Size": "L"
  1729. }
  1730. }
  1731. ]
  1732. }
  1733. """
  1734. products_data = request.data.get('products', [])
  1735. if not products_data:
  1736. return Response({
  1737. "error": "products list is required"
  1738. }, status=status.HTTP_400_BAD_REQUEST)
  1739. results = []
  1740. successful = 0
  1741. failed = 0
  1742. with transaction.atomic():
  1743. for product_data in products_data:
  1744. serializer = BulkProductAttributeValueSerializer(data=product_data)
  1745. if not serializer.is_valid():
  1746. failed += 1
  1747. results.append({
  1748. "item_id": product_data.get('item_id'),
  1749. "status": "failed",
  1750. "error": serializer.errors
  1751. })
  1752. continue
  1753. validated_data = serializer.validated_data
  1754. item_id = validated_data['item_id']
  1755. attributes = validated_data['attributes']
  1756. try:
  1757. product = Product.objects.get(item_id=item_id)
  1758. created_count = 0
  1759. updated_count = 0
  1760. for attr_name, original_value in attributes.items():
  1761. _, created = ProductAttributeValue.objects.update_or_create(
  1762. product=product,
  1763. attribute_name=attr_name,
  1764. defaults={'original_value': original_value}
  1765. )
  1766. if created:
  1767. created_count += 1
  1768. else:
  1769. updated_count += 1
  1770. successful += 1
  1771. results.append({
  1772. "item_id": item_id,
  1773. "status": "success",
  1774. "created": created_count,
  1775. "updated": updated_count
  1776. })
  1777. except Product.DoesNotExist:
  1778. failed += 1
  1779. results.append({
  1780. "item_id": item_id,
  1781. "status": "failed",
  1782. "error": f"Product not found"
  1783. })
  1784. return Response({
  1785. "results": results,
  1786. "total_products": len(products_data),
  1787. "successful": successful,
  1788. "failed": failed
  1789. }, status=status.HTTP_200_OK)
  1790. class ProductListWithAttributesView(APIView):
  1791. """
  1792. GET API to list all products with their original attribute values.
  1793. """
  1794. def get(self, request):
  1795. item_id = request.query_params.get('item_id')
  1796. if item_id:
  1797. try:
  1798. product = Product.objects.get(item_id=item_id)
  1799. serializer = ProductWithAttributesSerializer(product)
  1800. return Response(serializer.data, status=status.HTTP_200_OK)
  1801. except Product.DoesNotExist:
  1802. return Response({
  1803. "error": f"Product with item_id '{item_id}' not found"
  1804. }, status=status.HTTP_404_NOT_FOUND)
  1805. else:
  1806. products = Product.objects.all()
  1807. serializer = ProductWithAttributesSerializer(products, many=True)
  1808. return Response(serializer.data, status=status.HTTP_200_OK)