views.py 124 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793
  1. from rest_framework.views import APIView
  2. from rest_framework.response import Response
  3. from rest_framework import status
  4. from rest_framework.parsers import MultiPartParser, FormParser
  5. from django.db import transaction
  6. import pandas as pd
  7. from .models import Product, ProductType, ProductAttribute, AttributePossibleValue
  8. from .serializers import (
  9. SingleProductRequestSerializer,
  10. BatchProductRequestSerializer,
  11. ProductAttributeResultSerializer,
  12. BatchProductResponseSerializer,
  13. ProductSerializer,
  14. ProductTypeSerializer,
  15. ProductAttributeSerializer,
  16. AttributePossibleValueSerializer
  17. )
  18. from .services import ProductAttributeService
  19. from .ocr_service import OCRService
  20. # Sample test images (publicly available)
  21. SAMPLE_IMAGES = {
  22. "tshirt": "https://images.unsplash.com/photo-1521572163474-6864f9cf17ab",
  23. "dress": "https://images.unsplash.com/photo-1595777457583-95e059d581b8",
  24. "jeans": "https://images.unsplash.com/photo-1542272604-787c3835535d"
  25. }
  26. # ==================== Updated views.py ====================
  27. from rest_framework.views import APIView
  28. from rest_framework.response import Response
  29. from rest_framework import status
  30. from .models import Product
  31. from .services import ProductAttributeService
  32. from .ocr_service import OCRService
  33. from .visual_processing_service import VisualProcessingService
  34. class ExtractProductAttributesView(APIView):
  35. """
  36. API endpoint to extract product attributes for a single product by item_id.
  37. Fetches product details from database with source tracking.
  38. Returns attributes in array format: [{"value": "...", "source": "..."}]
  39. Includes OCR and Visual Processing results.
  40. """
  41. def post(self, request):
  42. serializer = SingleProductRequestSerializer(data=request.data)
  43. if not serializer.is_valid():
  44. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  45. validated_data = serializer.validated_data
  46. item_id = validated_data.get("item_id")
  47. # Fetch product from DB
  48. try:
  49. product = Product.objects.get(item_id=item_id)
  50. except Product.DoesNotExist:
  51. return Response(
  52. {"error": f"Product with item_id '{item_id}' not found."},
  53. status=status.HTTP_404_NOT_FOUND
  54. )
  55. # Extract product details
  56. title = product.product_name
  57. short_desc = product.product_short_description
  58. long_desc = product.product_long_description
  59. image_url = product.image_path
  60. # Process image for OCR if required
  61. ocr_results = None
  62. ocr_text = None
  63. visual_results = None
  64. if validated_data.get("process_image", True) and image_url:
  65. # OCR Processing
  66. ocr_service = OCRService()
  67. ocr_results = ocr_service.process_image(image_url)
  68. if ocr_results and ocr_results.get("detected_text"):
  69. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  70. ocr_results, validated_data.get("model")
  71. )
  72. ocr_results["extracted_attributes"] = ocr_attrs
  73. ocr_text = "\n".join([
  74. f"{item['text']} (confidence: {item['confidence']:.2f})"
  75. for item in ocr_results["detected_text"]
  76. ])
  77. # Visual Processing
  78. visual_service = VisualProcessingService()
  79. product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  80. visual_results = visual_service.process_image(image_url, product_type_hint)
  81. # Combine all product text with source tracking
  82. product_text, source_map = ProductAttributeService.combine_product_text(
  83. title=title,
  84. short_desc=short_desc,
  85. long_desc=long_desc,
  86. ocr_text=ocr_text
  87. )
  88. # Extract attributes with enhanced features and source tracking
  89. result = ProductAttributeService.extract_attributes(
  90. product_text=product_text,
  91. mandatory_attrs=validated_data["mandatory_attrs"],
  92. source_map=source_map,
  93. model=validated_data.get("model"),
  94. extract_additional=validated_data.get("extract_additional", True),
  95. multiple=validated_data.get("multiple", []),
  96. threshold_abs=validated_data.get("threshold_abs", 0.65),
  97. margin=validated_data.get("margin", 0.15),
  98. use_dynamic_thresholds=validated_data.get("use_dynamic_thresholds", True),
  99. use_adaptive_margin=validated_data.get("use_adaptive_margin", True),
  100. use_semantic_clustering=validated_data.get("use_semantic_clustering", True)
  101. )
  102. # Attach OCR results if available
  103. if ocr_results:
  104. result["ocr_results"] = ocr_results
  105. # Attach Visual Processing results if available
  106. if visual_results:
  107. result["visual_results"] = visual_results
  108. response_serializer = ProductAttributeResultSerializer(data=result)
  109. if response_serializer.is_valid():
  110. return Response(response_serializer.data, status=status.HTTP_200_OK)
  111. return Response(result, status=status.HTTP_200_OK)
  112. # class BatchExtractProductAttributesView(APIView):
  113. # """
  114. # API endpoint to extract product attributes for multiple products in batch.
  115. # Uses item-specific mandatory_attrs with source tracking.
  116. # Returns attributes in array format: [{"value": "...", "source": "..."}]
  117. # Includes OCR and Visual Processing results.
  118. # """
  119. # def post(self, request):
  120. # serializer = BatchProductRequestSerializer(data=request.data)
  121. # if not serializer.is_valid():
  122. # return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  123. # validated_data = serializer.validated_data
  124. # # DEBUG: Print what we received
  125. # print("\n" + "="*80)
  126. # print("BATCH REQUEST - RECEIVED DATA")
  127. # print("="*80)
  128. # print(f"Raw request data keys: {request.data.keys()}")
  129. # print(f"Multiple field in request: {request.data.get('multiple')}")
  130. # print(f"Validated multiple field: {validated_data.get('multiple')}")
  131. # print("="*80 + "\n")
  132. # # Get batch-level settings
  133. # product_list = validated_data.get("products", [])
  134. # model = validated_data.get("model")
  135. # extract_additional = validated_data.get("extract_additional", True)
  136. # process_image = validated_data.get("process_image", True)
  137. # multiple = validated_data.get("multiple", [])
  138. # threshold_abs = validated_data.get("threshold_abs", 0.65)
  139. # margin = validated_data.get("margin", 0.15)
  140. # use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
  141. # use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
  142. # use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
  143. # # DEBUG: Print extracted settings
  144. # print(f"Extracted multiple parameter: {multiple}")
  145. # print(f"Type: {type(multiple)}")
  146. # # Extract all item_ids to query the database efficiently
  147. # item_ids = [p['item_id'] for p in product_list]
  148. # # Fetch all products in one query
  149. # products_queryset = Product.objects.filter(item_id__in=item_ids)
  150. # # Create a dictionary for easy lookup: item_id -> Product object
  151. # product_map = {product.item_id: product for product in products_queryset}
  152. # found_ids = set(product_map.keys())
  153. # results = []
  154. # successful = 0
  155. # failed = 0
  156. # for product_entry in product_list:
  157. # item_id = product_entry['item_id']
  158. # # Get item-specific mandatory attributes
  159. # mandatory_attrs = product_entry['mandatory_attrs']
  160. # if item_id not in found_ids:
  161. # failed += 1
  162. # results.append({
  163. # "product_id": item_id,
  164. # "error": "Product not found in database"
  165. # })
  166. # continue
  167. # product = product_map[item_id]
  168. # try:
  169. # title = product.product_name
  170. # short_desc = product.product_short_description
  171. # long_desc = product.product_long_description
  172. # image_url = product.image_path
  173. # # image_url = "https://images.unsplash.com/photo-1595777457583-95e059d581b8"
  174. # ocr_results = None
  175. # ocr_text = None
  176. # visual_results = None
  177. # # Image Processing Logic
  178. # if process_image and image_url:
  179. # # OCR Processing
  180. # ocr_service = OCRService()
  181. # ocr_results = ocr_service.process_image(image_url)
  182. # print(f"OCR results for {item_id}: {ocr_results}")
  183. # if ocr_results and ocr_results.get("detected_text"):
  184. # ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  185. # ocr_results, model
  186. # )
  187. # ocr_results["extracted_attributes"] = ocr_attrs
  188. # ocr_text = "\n".join([
  189. # f"{item['text']} (confidence: {item['confidence']:.2f})"
  190. # for item in ocr_results["detected_text"]
  191. # ])
  192. # # Visual Processing
  193. # visual_service = VisualProcessingService()
  194. # product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  195. # visual_results = visual_service.process_image(image_url, product_type_hint)
  196. # print(f"Visual results for {item_id}: {visual_results.get('visual_attributes', {})}")
  197. # # Format visual attributes to array format with source tracking
  198. # if visual_results and visual_results.get('visual_attributes'):
  199. # visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  200. # visual_results['visual_attributes']
  201. # )
  202. # # Combine product text with source tracking
  203. # product_text, source_map = ProductAttributeService.combine_product_text(
  204. # title=title,
  205. # short_desc=short_desc,
  206. # long_desc=long_desc,
  207. # ocr_text=ocr_text
  208. # )
  209. # # DEBUG: Print before extraction
  210. # print(f"\n>>> Extracting for product {item_id}")
  211. # print(f" Passing multiple: {multiple}")
  212. # # Attribute Extraction with source tracking (returns array format)
  213. # extracted = ProductAttributeService.extract_attributes(
  214. # product_text=product_text,
  215. # mandatory_attrs=mandatory_attrs,
  216. # source_map=source_map,
  217. # model=model,
  218. # extract_additional=extract_additional,
  219. # multiple=multiple,
  220. # threshold_abs=threshold_abs,
  221. # margin=margin,
  222. # use_dynamic_thresholds=use_dynamic_thresholds,
  223. # use_adaptive_margin=use_adaptive_margin,
  224. # use_semantic_clustering=use_semantic_clustering
  225. # )
  226. # result = {
  227. # "product_id": product.item_id,
  228. # "mandatory": extracted.get("mandatory", {}),
  229. # "additional": extracted.get("additional", {}),
  230. # }
  231. # # Attach OCR results if available
  232. # if ocr_results:
  233. # result["ocr_results"] = ocr_results
  234. # # Attach Visual Processing results if available
  235. # if visual_results:
  236. # result["visual_results"] = visual_results
  237. # results.append(result)
  238. # successful += 1
  239. # except Exception as e:
  240. # failed += 1
  241. # results.append({
  242. # "product_id": item_id,
  243. # "error": str(e)
  244. # })
  245. # batch_result = {
  246. # "results": results,
  247. # "total_products": len(product_list),
  248. # "successful": successful,
  249. # "failed": failed
  250. # }
  251. # response_serializer = BatchProductResponseSerializer(data=batch_result)
  252. # if response_serializer.is_valid():
  253. # return Response(response_serializer.data, status=status.HTTP_200_OK)
  254. # return Response(batch_result, status=status.HTTP_200_OK)
  255. # Replace the BatchExtractProductAttributesView in your views.py with this updated version
  256. class BatchExtractProductAttributesView(APIView):
  257. """
  258. API endpoint to extract product attributes for multiple products in batch.
  259. Uses item-specific mandatory_attrs with source tracking.
  260. Returns attributes in array format with original_value field.
  261. Includes OCR and Visual Processing results.
  262. """
  263. def post(self, request):
  264. serializer = BatchProductRequestSerializer(data=request.data)
  265. if not serializer.is_valid():
  266. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  267. validated_data = serializer.validated_data
  268. # Get batch-level settings
  269. product_list = validated_data.get("products", [])
  270. model = validated_data.get("model")
  271. extract_additional = validated_data.get("extract_additional", True)
  272. process_image = validated_data.get("process_image", True)
  273. multiple = validated_data.get("multiple", [])
  274. threshold_abs = validated_data.get("threshold_abs", 0.65)
  275. margin = validated_data.get("margin", 0.15)
  276. use_dynamic_thresholds = validated_data.get("use_dynamic_thresholds", True)
  277. use_adaptive_margin = validated_data.get("use_adaptive_margin", True)
  278. use_semantic_clustering = validated_data.get("use_semantic_clustering", True)
  279. # Extract all item_ids to query the database efficiently
  280. item_ids = [p['item_id'] for p in product_list]
  281. # Fetch all products in one query
  282. products_queryset = Product.objects.filter(item_id__in=item_ids)
  283. product_map = {product.item_id: product for product in products_queryset}
  284. found_ids = set(product_map.keys())
  285. # Fetch all original attribute values for these products in one query
  286. original_values_qs = ProductAttributeValue.objects.filter(
  287. product__item_id__in=item_ids
  288. ).select_related('product')
  289. # Create a nested dictionary: {item_id: {attribute_name: original_value}}
  290. original_values_map = {}
  291. for attr_val in original_values_qs:
  292. item_id = attr_val.product.item_id
  293. if item_id not in original_values_map:
  294. original_values_map[item_id] = {}
  295. original_values_map[item_id][attr_val.attribute_name] = attr_val.original_value
  296. results = []
  297. successful = 0
  298. failed = 0
  299. for product_entry in product_list:
  300. item_id = product_entry['item_id']
  301. mandatory_attrs = product_entry['mandatory_attrs']
  302. if item_id not in found_ids:
  303. failed += 1
  304. results.append({
  305. "product_id": item_id,
  306. "error": "Product not found in database"
  307. })
  308. continue
  309. product = product_map[item_id]
  310. try:
  311. title = product.product_name
  312. short_desc = product.product_short_description
  313. long_desc = product.product_long_description
  314. image_url = product.image_path
  315. ocr_results = None
  316. ocr_text = None
  317. visual_results = None
  318. # Image Processing Logic
  319. if process_image and image_url:
  320. # OCR Processing
  321. ocr_service = OCRService()
  322. ocr_results = ocr_service.process_image(image_url)
  323. if ocr_results and ocr_results.get("detected_text"):
  324. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  325. ocr_results, model
  326. )
  327. ocr_results["extracted_attributes"] = ocr_attrs
  328. ocr_text = "\n".join([
  329. f"{item['text']} (confidence: {item['confidence']:.2f})"
  330. for item in ocr_results["detected_text"]
  331. ])
  332. # Visual Processing
  333. visual_service = VisualProcessingService()
  334. product_type_hint = product.product_type if hasattr(product, 'product_type') else None
  335. visual_results = visual_service.process_image(image_url, product_type_hint)
  336. # Format visual attributes to array format with source tracking
  337. if visual_results and visual_results.get('visual_attributes'):
  338. visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  339. visual_results['visual_attributes']
  340. )
  341. # Combine product text with source tracking
  342. product_text, source_map = ProductAttributeService.combine_product_text(
  343. title=title,
  344. short_desc=short_desc,
  345. long_desc=long_desc,
  346. ocr_text=ocr_text
  347. )
  348. # Attribute Extraction with source tracking (returns array format)
  349. extracted = ProductAttributeService.extract_attributes(
  350. product_text=product_text,
  351. mandatory_attrs=mandatory_attrs,
  352. source_map=source_map,
  353. model=model,
  354. extract_additional=extract_additional,
  355. multiple=multiple,
  356. threshold_abs=threshold_abs,
  357. margin=margin,
  358. use_dynamic_thresholds=use_dynamic_thresholds,
  359. use_adaptive_margin=use_adaptive_margin,
  360. use_semantic_clustering=use_semantic_clustering
  361. )
  362. # Add original_value to each extracted attribute
  363. original_attrs = original_values_map.get(item_id, {})
  364. # Process mandatory attributes
  365. for attr_name, attr_values in extracted.get("mandatory", {}).items():
  366. if isinstance(attr_values, list):
  367. for attr_obj in attr_values:
  368. if isinstance(attr_obj, dict):
  369. # Add original_value if it exists
  370. attr_obj["original_value"] = original_attrs.get(attr_name, "")
  371. # Process additional attributes
  372. for attr_name, attr_values in extracted.get("additional", {}).items():
  373. if isinstance(attr_values, list):
  374. for attr_obj in attr_values:
  375. if isinstance(attr_obj, dict):
  376. # Add original_value if it exists
  377. attr_obj["original_value"] = original_attrs.get(attr_name, "")
  378. result = {
  379. "product_id": product.item_id,
  380. "mandatory": extracted.get("mandatory", {}),
  381. "additional": extracted.get("additional", {}),
  382. }
  383. # Attach OCR results if available
  384. if ocr_results:
  385. result["ocr_results"] = ocr_results
  386. # Attach Visual Processing results if available
  387. if visual_results:
  388. result["visual_results"] = visual_results
  389. results.append(result)
  390. successful += 1
  391. except Exception as e:
  392. failed += 1
  393. results.append({
  394. "product_id": item_id,
  395. "error": str(e)
  396. })
  397. batch_result = {
  398. "results": results,
  399. "total_products": len(product_list),
  400. "successful": successful,
  401. "failed": failed
  402. }
  403. response_serializer = BatchProductResponseSerializer(data=batch_result)
  404. if response_serializer.is_valid():
  405. return Response(response_serializer.data, status=status.HTTP_200_OK)
  406. return Response(batch_result, status=status.HTTP_200_OK)
  407. class ProductListView(APIView):
  408. """
  409. GET API to list all products with details
  410. """
  411. def get(self, request):
  412. products = Product.objects.all()
  413. serializer = ProductSerializer(products, many=True)
  414. return Response(serializer.data, status=status.HTTP_200_OK)
  415. from rest_framework.views import APIView
  416. from rest_framework.response import Response
  417. from rest_framework import status
  418. from rest_framework.parsers import MultiPartParser, FormParser
  419. import pandas as pd
  420. from .models import Product
  421. # class ProductUploadExcelView(APIView):
  422. # """
  423. # POST API to upload an Excel file and add/update data in Product model.
  424. # - Creates new records if they don't exist.
  425. # - Updates existing ones (e.g., when image_path or other fields change).
  426. # """
  427. # parser_classes = (MultiPartParser, FormParser)
  428. # def post(self, request, *args, **kwargs):
  429. # file_obj = request.FILES.get('file')
  430. # if not file_obj:
  431. # return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  432. # try:
  433. # # Read Excel into DataFrame
  434. # df = pd.read_excel(file_obj)
  435. # df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
  436. # expected_cols = {
  437. # 'item_id',
  438. # 'product_name',
  439. # 'product_long_description',
  440. # 'product_short_description',
  441. # 'product_type',
  442. # 'image_path'
  443. # }
  444. # # Check required columns
  445. # if not expected_cols.issubset(df.columns):
  446. # return Response({
  447. # 'error': 'Missing required columns',
  448. # 'required_columns': list(expected_cols)
  449. # }, status=status.HTTP_400_BAD_REQUEST)
  450. # created_count = 0
  451. # updated_count = 0
  452. # # Loop through rows and update or create
  453. # for _, row in df.iterrows():
  454. # item_id = str(row.get('item_id', '')).strip()
  455. # if not item_id:
  456. # continue # Skip rows without an item_id
  457. # defaults = {
  458. # 'product_name': row.get('product_name', ''),
  459. # 'product_long_description': row.get('product_long_description', ''),
  460. # 'product_short_description': row.get('product_short_description', ''),
  461. # 'product_type': row.get('product_type', ''),
  462. # 'image_path': row.get('image_path', ''),
  463. # }
  464. # obj, created = Product.objects.update_or_create(
  465. # item_id=item_id,
  466. # defaults=defaults
  467. # )
  468. # if created:
  469. # created_count += 1
  470. # else:
  471. # updated_count += 1
  472. # return Response({
  473. # 'message': f'Upload successful.',
  474. # 'created': f'{created_count} new records added.',
  475. # 'updated': f'{updated_count} existing records updated.'
  476. # }, status=status.HTTP_201_CREATED)
  477. # except Exception as e:
  478. # return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  479. # Replace the ProductUploadExcelView in your views.py with this updated version
  480. from rest_framework.views import APIView
  481. from rest_framework.response import Response
  482. from rest_framework import status
  483. from rest_framework.parsers import MultiPartParser, FormParser
  484. from django.db import transaction
  485. import pandas as pd
  486. from .models import Product, ProductAttributeValue
  487. # class ProductUploadExcelView(APIView):
  488. # """
  489. # POST API to upload an Excel file with two sheets:
  490. # 1. 'Products' sheet - Product details
  491. # 2. 'Attribute_values' sheet - Original attribute values
  492. # Creates/updates both products and their attribute values in a single transaction.
  493. # """
  494. # parser_classes = (MultiPartParser, FormParser)
  495. # def post(self, request, *args, **kwargs):
  496. # file_obj = request.FILES.get('file')
  497. # if not file_obj:
  498. # return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  499. # try:
  500. # # Read all sheets from Excel file
  501. # excel_file = pd.ExcelFile(file_obj)
  502. # # Check if required sheets exist
  503. # if 'Products' not in excel_file.sheet_names:
  504. # return Response({
  505. # 'error': "Missing 'Products' sheet",
  506. # 'available_sheets': excel_file.sheet_names
  507. # }, status=status.HTTP_400_BAD_REQUEST)
  508. # # Read Products sheet
  509. # df_products = pd.read_excel(excel_file, sheet_name='Products')
  510. # df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
  511. # # Check required columns for Products
  512. # expected_product_cols = {
  513. # 'item_id',
  514. # 'product_name',
  515. # 'product_long_description',
  516. # 'product_short_description',
  517. # 'product_type',
  518. # 'image_path'
  519. # }
  520. # if not expected_product_cols.issubset(df_products.columns):
  521. # return Response({
  522. # 'error': 'Missing required columns in Products sheet',
  523. # 'required_columns': list(expected_product_cols),
  524. # 'found_columns': list(df_products.columns)
  525. # }, status=status.HTTP_400_BAD_REQUEST)
  526. # # Read Attribute_values sheet if it exists
  527. # df_attributes = None
  528. # has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
  529. # if has_attributes_sheet:
  530. # df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
  531. # df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
  532. # # Check required columns for Attribute_values
  533. # expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
  534. # if not expected_attr_cols.issubset(df_attributes.columns):
  535. # return Response({
  536. # 'error': 'Missing required columns in Attribute_values sheet',
  537. # 'required_columns': list(expected_attr_cols),
  538. # 'found_columns': list(df_attributes.columns)
  539. # }, status=status.HTTP_400_BAD_REQUEST)
  540. # # Initialize counters
  541. # products_created = 0
  542. # products_updated = 0
  543. # attributes_created = 0
  544. # attributes_updated = 0
  545. # products_failed = 0
  546. # attributes_failed = 0
  547. # errors = []
  548. # # Use transaction to ensure atomicity
  549. # with transaction.atomic():
  550. # # Process Products sheet
  551. # for idx, row in df_products.iterrows():
  552. # item_id = str(row.get('item_id', '')).strip()
  553. # if not item_id:
  554. # products_failed += 1
  555. # errors.append(f"Products Row {idx + 2}: Missing item_id")
  556. # continue
  557. # try:
  558. # defaults = {
  559. # 'product_name': str(row.get('product_name', '')),
  560. # 'product_long_description': str(row.get('product_long_description', '')),
  561. # 'product_short_description': str(row.get('product_short_description', '')),
  562. # 'product_type': str(row.get('product_type', '')),
  563. # 'image_path': str(row.get('image_path', '')),
  564. # }
  565. # obj, created = Product.objects.update_or_create(
  566. # item_id=item_id,
  567. # defaults=defaults
  568. # )
  569. # if created:
  570. # products_created += 1
  571. # else:
  572. # products_updated += 1
  573. # except Exception as e:
  574. # products_failed += 1
  575. # errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
  576. # # Process Attribute_values sheet if it exists
  577. # if has_attributes_sheet and df_attributes is not None:
  578. # # Group by item_id to optimize lookups
  579. # item_ids_in_attrs = df_attributes['item_id'].unique()
  580. # # Fetch all products at once
  581. # existing_products = {
  582. # p.item_id: p
  583. # for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
  584. # }
  585. # for idx, row in df_attributes.iterrows():
  586. # item_id = str(row.get('item_id', '')).strip()
  587. # attribute_name = str(row.get('attribute_name', '')).strip()
  588. # original_value = str(row.get('original_value', '')).strip()
  589. # if not item_id or not attribute_name:
  590. # attributes_failed += 1
  591. # errors.append(
  592. # f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
  593. # )
  594. # continue
  595. # # Check if product exists
  596. # product = existing_products.get(item_id)
  597. # if not product:
  598. # attributes_failed += 1
  599. # errors.append(
  600. # f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
  601. # "Make sure it exists in Products sheet."
  602. # )
  603. # continue
  604. # try:
  605. # attr_obj, created = ProductAttributeValue.objects.update_or_create(
  606. # product=product,
  607. # attribute_name=attribute_name,
  608. # defaults={'original_value': original_value}
  609. # )
  610. # if created:
  611. # attributes_created += 1
  612. # else:
  613. # attributes_updated += 1
  614. # except Exception as e:
  615. # attributes_failed += 1
  616. # errors.append(
  617. # f"Attribute_values Row {idx + 2} "
  618. # f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
  619. # )
  620. # # Prepare response
  621. # response_data = {
  622. # 'message': 'Upload completed successfully',
  623. # 'products': {
  624. # 'created': products_created,
  625. # 'updated': products_updated,
  626. # 'failed': products_failed,
  627. # 'total_processed': products_created + products_updated + products_failed
  628. # }
  629. # }
  630. # if has_attributes_sheet:
  631. # response_data['attribute_values'] = {
  632. # 'created': attributes_created,
  633. # 'updated': attributes_updated,
  634. # 'failed': attributes_failed,
  635. # 'total_processed': attributes_created + attributes_updated + attributes_failed
  636. # }
  637. # else:
  638. # response_data['attribute_values'] = {
  639. # 'message': 'Attribute_values sheet not found in Excel file'
  640. # }
  641. # if errors:
  642. # response_data['errors'] = errors[:50] # Limit to first 50 errors
  643. # if len(errors) > 50:
  644. # response_data['errors'].append(f"... and {len(errors) - 50} more errors")
  645. # # Determine status code
  646. # if products_failed > 0 or attributes_failed > 0:
  647. # status_code = status.HTTP_207_MULTI_STATUS
  648. # else:
  649. # status_code = status.HTTP_201_CREATED
  650. # return Response(response_data, status=status_code)
  651. # except pd.errors.EmptyDataError:
  652. # return Response({
  653. # 'error': 'The uploaded Excel file is empty or invalid'
  654. # }, status=status.HTTP_400_BAD_REQUEST)
  655. # except Exception as e:
  656. # return Response({
  657. # 'error': f'An error occurred while processing the file: {str(e)}'
  658. # }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  659. # import logging
  660. # import json
  661. # from rest_framework.views import APIView
  662. # from rest_framework.response import Response
  663. # from rest_framework import status
  664. # from rest_framework.parsers import MultiPartParser, FormParser
  665. # from django.db import transaction
  666. # from django.db.models import Prefetch, F
  667. # import pandas as pd
  668. # # Import ALL your models
  669. # from .models import Product, ProductAttributeValue, ProductType, ProductAttribute, AttributePossibleValue
  670. # from .services import ProductAttributeService
  671. # from .ocr_service import OCRService
  672. # from .visual_processing_service import VisualProcessingService
  673. # from openpyxl import Workbook
  674. # from openpyxl.styles import Font, PatternFill, Alignment
  675. # from django.conf import settings
  676. # import os
  677. # import threading
  678. # from datetime import datetime
  679. # # --- Logging Setup ---
  680. # # Define log and status file paths in MEDIA_ROOT
  681. # LOG_FILE_PATH = os.path.join(settings.MEDIA_ROOT, 'excel_generation.log')
  682. # STATUS_FILE_PATH = os.path.join(settings.MEDIA_ROOT, 'excel_generation_status.json')
  683. # # Ensure the MEDIA_ROOT exists for files to be saved
  684. # if not os.path.exists(settings.MEDIA_ROOT):
  685. # os.makedirs(settings.MEDIA_ROOT)
  686. # # Configure basic logging
  687. # logging.basicConfig(
  688. # filename=LOG_FILE_PATH,
  689. # level=logging.INFO,
  690. # format='%(asctime)s - %(levelname)s - %(message)s'
  691. # )
  692. # logger = logging.getLogger(__name__)
  693. # # -------------------------------------------------------------------------------------------------
  694. # def generate_product_excel_background():
  695. # """
  696. # Function to perform batch attribute extraction for all products and generate an Excel file.
  697. # Runs in a background thread to avoid blocking the API response.
  698. # Logs success/failure and saves a status file for external monitoring.
  699. # """
  700. # logger.info(f"[{datetime.now().isoformat()}] Starting background product Excel generation and attribute extraction.")
  701. # successful = 0
  702. # failed = 0
  703. # results = [] # To store detailed extraction results for Excel sheet 2
  704. # # Function to write status file (SUCCESS/FAILED)
  705. # def write_status(status_type, error_msg=None):
  706. # status_data = {
  707. # "status": status_type,
  708. # "timestamp": datetime.now().isoformat(),
  709. # "products_processed": successful + failed,
  710. # "products_successful": successful,
  711. # "products_failed": failed,
  712. # "excel_path": os.path.join(settings.MEDIA_URL, 'generated_products.xlsx') if status_type == "SUCCESS" else None,
  713. # "log_path": os.path.join(settings.MEDIA_URL, 'excel_generation.log'),
  714. # "error_message": error_msg
  715. # }
  716. # try:
  717. # with open(STATUS_FILE_PATH, 'w') as f:
  718. # json.dump(status_data, f, indent=4)
  719. # except Exception as e:
  720. # logger.exception(f"CRITICAL ERROR: Failed to write status file at {STATUS_FILE_PATH}: {e}")
  721. # try:
  722. # # 1. PREFETCH all necessary related data to minimize database queries
  723. # # Prefetch possible values for mandatory attributes
  724. # possible_values_prefetch = Prefetch(
  725. # 'attributes',
  726. # queryset=ProductAttribute.objects.filter(is_mandatory=True).prefetch_related('possible_values')
  727. # )
  728. # # Fetch all ProductTypes with their mandatory attributes and possible values
  729. # all_product_types = ProductType.objects.prefetch_related(possible_values_prefetch)
  730. # product_type_map = {
  731. # pt.name: pt for pt in all_product_types
  732. # }
  733. # # Prepare product_list for batch extraction
  734. # all_products = Product.objects.all()
  735. # product_list = []
  736. # for p in all_products:
  737. # # mandatory_attrs will be the dictionary required by the service
  738. # mandatory_attrs_dict = {}
  739. # product_type_name = p.product_type.strip() if p.product_type else None
  740. # if product_type_name and product_type_name in product_type_map:
  741. # pt = product_type_map[product_type_name]
  742. # # Build the mandatory_attrs dictionary: { "Attribute Name": ["Value 1", "Value 2"], ... }
  743. # for attr in pt.attributes.all(): # .all() here works because we used Prefetch for 'attributes'
  744. # # attr.possible_values.all() works because we used prefetch_related('possible_values')
  745. # mandatory_attrs_dict[attr.name] = [
  746. # pv.value for pv in attr.possible_values.all()
  747. # ]
  748. # product_list.append({
  749. # "item_id": p.item_id,
  750. # "product_type_name": product_type_name,
  751. # "mandatory_attrs": mandatory_attrs_dict # <-- FIX: Pass the dictionary here
  752. # })
  753. # # Batch settings (using defaults)
  754. # model = "llama-3.1-8b-instant"
  755. # extract_additional = True
  756. # process_image = False
  757. # multiple = []
  758. # threshold_abs = 0.65
  759. # margin = 0.15
  760. # use_dynamic_thresholds = True
  761. # use_adaptive_margin = True
  762. # use_semantic_clustering = True
  763. # # Batch extraction logic
  764. # item_ids = [p['item_id'] for p in product_list]
  765. # products_queryset = Product.objects.filter(item_id__in=item_ids)
  766. # product_map = {product.item_id: product for product in products_queryset}
  767. # found_ids = set(product_map.keys())
  768. # for product_entry in product_list:
  769. # item_id = product_entry['item_id']
  770. # # FIX: mandatory_attrs is now correctly a dictionary (or an empty dictionary)
  771. # mandatory_attrs = product_entry['mandatory_attrs']
  772. # if item_id not in found_ids:
  773. # failed += 1
  774. # results.append({
  775. # "product_id": item_id,
  776. # "error": "Product not found in database"
  777. # })
  778. # logger.warning(f"Product {item_id} not found in database. Skipping extraction.")
  779. # continue
  780. # product = product_map[item_id]
  781. # try:
  782. # title = product.product_name
  783. # short_desc = product.product_short_description
  784. # long_desc = product.product_long_description
  785. # image_url = product.image_path
  786. # ocr_results = None
  787. # ocr_text = None
  788. # visual_results = None
  789. # if process_image and image_url:
  790. # logger.info(f"Processing image for product {item_id}...")
  791. # # OCR Processing
  792. # ocr_service = OCRService()
  793. # ocr_results = ocr_service.process_image(image_url)
  794. # if ocr_results and ocr_results.get("detected_text"):
  795. # # NOTE: Assuming ProductAttributeService.extract_attributes_from_ocr exists
  796. # ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  797. # ocr_results, model
  798. # )
  799. # ocr_results["extracted_attributes"] = ocr_attrs
  800. # ocr_text = "\n".join([
  801. # f"{item['text']} (confidence: {item['confidence']:.2f})"
  802. # for item in ocr_results["detected_text"]
  803. # ])
  804. # # Visual Processing
  805. # visual_service = VisualProcessingService()
  806. # product_type_hint = product.product_type if product.product_type else None
  807. # visual_results = visual_service.process_image(image_url, product_type_hint)
  808. # if visual_results and visual_results.get('visual_attributes'):
  809. # visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  810. # visual_results['visual_attributes']
  811. # )
  812. # logger.info(f"Image processing done for product {item_id}.")
  813. # # Combine product text with source tracking
  814. # product_text, source_map = ProductAttributeService.combine_product_text(
  815. # title=title,
  816. # short_desc=short_desc,
  817. # long_desc=long_desc,
  818. # ocr_text=ocr_text
  819. # )
  820. # # Attribute Extraction with source tracking
  821. # extracted = ProductAttributeService.extract_attributes(
  822. # product_text=product_text,
  823. # mandatory_attrs=mandatory_attrs, # <-- This is now the dictionary with possible values
  824. # source_map=source_map,
  825. # model=model,
  826. # extract_additional=extract_additional,
  827. # multiple=multiple,
  828. # threshold_abs=threshold_abs,
  829. # margin=margin,
  830. # use_dynamic_thresholds=use_dynamic_thresholds,
  831. # use_adaptive_margin=use_adaptive_margin,
  832. # use_semantic_clustering=use_semantic_clustering
  833. # )
  834. # result = {
  835. # "product_id": item_id,
  836. # "mandatory": extracted.get("mandatory", {}),
  837. # "additional": extracted.get("additional", {}),
  838. # }
  839. # if ocr_results:
  840. # result["ocr_results"] = ocr_results
  841. # if visual_results:
  842. # result["visual_results"] = visual_results
  843. # results.append(result)
  844. # successful += 1
  845. # logger.info(f"Attribute extraction successful for product {item_id}.")
  846. # except Exception as e:
  847. # failed += 1
  848. # results.append({
  849. # "product_id": item_id,
  850. # "error": str(e)
  851. # })
  852. # # Original Error: AttributeError: 'list' object has no attribute 'items'
  853. # # This should now be fixed, but we keep the robust exception handling.
  854. # logger.exception(f"Error during attribute extraction for product {item_id}.")
  855. # logger.info(f"Batch extraction phase complete. Successful: {successful}, Failed: {failed}")
  856. # # --------------------------------------------------------------------------------
  857. # # Generate and save the Excel file (Unchanged)
  858. # # --------------------------------------------------------------------------------
  859. # wb = Workbook()
  860. # # Sheet 1: Products (from DB)
  861. # ws_products = wb.active
  862. # ws_products.title = "Products"
  863. # products_headers = ['ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE', 'Product Short Description', 'Product Long Description', 'image_path']
  864. # header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  865. # header_font = Font(bold=True, color="FFFFFF")
  866. # for col_num, header in enumerate(products_headers, 1):
  867. # cell = ws_products.cell(row=1, column=col_num)
  868. # cell.value = header
  869. # cell.fill = header_fill
  870. # cell.font = header_font
  871. # cell.alignment = Alignment(horizontal="center", vertical="center")
  872. # all_products_db = Product.objects.all()
  873. # for row_num, product in enumerate(all_products_db, 2):
  874. # ws_products.cell(row=row_num, column=1, value=product.item_id)
  875. # ws_products.cell(row=row_num, column=2, value=product.product_name)
  876. # ws_products.cell(row=row_num, column=3, value=product.product_type)
  877. # ws_products.cell(row=row_num, column=4, value=product.product_short_description)
  878. # ws_products.cell(row=row_num, column=5, value=product.product_long_description)
  879. # ws_products.cell(row=row_num, column=6, value=product.image_path)
  880. # # Adjust column widths
  881. # for col_dim, width in zip(['A', 'B', 'C', 'D', 'E', 'F'], [15, 25, 15, 35, 50, 45]):
  882. # ws_products.column_dimensions[col_dim].width = width
  883. # # Sheet 2: Attribute_values (item_id, attribute_name, original_value, generated_value)
  884. # ws_attributes = wb.create_sheet("Attribute_values")
  885. # attributes_headers = ['item_id', 'attribute_name', 'original_value', 'generated_value']
  886. # for col_num, header in enumerate(attributes_headers, 1):
  887. # cell = ws_attributes.cell(row=1, column=col_num)
  888. # cell.value = header
  889. # cell.fill = header_fill
  890. # cell.font = header_font
  891. # cell.alignment = Alignment(horizontal="center", vertical="center")
  892. # # Fetch all original attributes
  893. # row_num = 2
  894. # all_original_attrs = ProductAttributeValue.objects.all()
  895. # # Create a lookup for original attributes by item_id and attribute_name
  896. # original_attrs_lookup = {
  897. # (attr.product.item_id, attr.attribute_name): attr.original_value
  898. # for attr in all_original_attrs
  899. # }
  900. # # Add attributes (original and generated)
  901. # processed_original_keys = set()
  902. # for res in results:
  903. # item_id = res["product_id"]
  904. # if "error" in res:
  905. # # Add existing original attributes for failed products to the sheet
  906. # for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  907. # if orig_item_id == item_id:
  908. # ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  909. # ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  910. # ws_attributes.cell(row=row_num, column=3, value=orig_value)
  911. # ws_attributes.cell(row=row_num, column=4, value=f"Extraction Failed: {res['error']}")
  912. # processed_original_keys.add((orig_item_id, orig_attr_name))
  913. # row_num += 1
  914. # continue
  915. # # Combine all generated attributes (mandatory, additional, OCR, visual)
  916. # generated_attrs = {}
  917. # for cat in ["mandatory", "additional"]:
  918. # attrs = res.get(cat, {})
  919. # for attr_name, values in attrs.items():
  920. # for val in values:
  921. # key = (item_id, attr_name)
  922. # if key not in generated_attrs:
  923. # generated_attrs[key] = []
  924. # generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
  925. # # OCR extracted
  926. # ocr = res.get("ocr_results")
  927. # if ocr and "extracted_attributes" in ocr and isinstance(ocr["extracted_attributes"], dict):
  928. # for attr_name, values in ocr["extracted_attributes"].items():
  929. # for val in values:
  930. # key = (item_id, attr_name)
  931. # if key not in generated_attrs:
  932. # generated_attrs[key] = []
  933. # generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
  934. # # Visual extracted
  935. # visual = res.get("visual_results")
  936. # if visual and "visual_attributes" in visual:
  937. # vis_attrs = visual["visual_attributes"]
  938. # if isinstance(vis_attrs, dict):
  939. # # Handle dict format where value might be a list of dicts or a single value
  940. # for attr_name, values in vis_attrs.items():
  941. # if not isinstance(values, list):
  942. # values = [{"value": values, "source": "visual"}]
  943. # for val in values:
  944. # key = (item_id, attr_name)
  945. # if key not in generated_attrs:
  946. # generated_attrs[key] = []
  947. # generated_attrs[key].append(f"{val['value']} (source: {val.get('source', 'visual')})")
  948. # elif isinstance(vis_attrs, list):
  949. # # Handle list of dicts format
  950. # for item in vis_attrs:
  951. # attr_name = item.get("attribute_name") or item.get("name")
  952. # if not attr_name: continue
  953. # value = item.get("value", "")
  954. # source = item.get("source", "visual")
  955. # key = (item_id, attr_name)
  956. # if key not in generated_attrs:
  957. # generated_attrs[key] = []
  958. # generated_attrs[key].append(f"{value} (source: {source})")
  959. # # Write attributes to Excel
  960. # for (attr_item_id, attr_name), gen_values in generated_attrs.items():
  961. # # Get original value from lookup (if it exists)
  962. # original_value = original_attrs_lookup.get((attr_item_id, attr_name), "")
  963. # # Combine multiple generated values into a single string
  964. # generated_value = "; ".join(gen_values) if gen_values else ""
  965. # # Write row
  966. # ws_attributes.cell(row=row_num, column=1, value=attr_item_id)
  967. # ws_attributes.cell(row=row_num, column=2, value=attr_name)
  968. # ws_attributes.cell(row=row_num, column=3, value=original_value)
  969. # ws_attributes.cell(row=row_num, column=4, value=generated_value)
  970. # processed_original_keys.add((attr_item_id, attr_name))
  971. # row_num += 1
  972. # # Add original attributes that have no generated values for this item_id
  973. # for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  974. # if orig_item_id == item_id and (orig_item_id, orig_attr_name) not in processed_original_keys:
  975. # ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  976. # ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  977. # ws_attributes.cell(row=row_num, column=3, value=orig_value)
  978. # ws_attributes.cell(row=row_num, column=4, value="") # No generated value
  979. # processed_original_keys.add((orig_item_id, orig_attr_name))
  980. # row_num += 1
  981. # # Add original attributes for products not included in the 'results' (e.g. if they didn't exist in product_list)
  982. # # We assume all products are in product_list, so this step might be redundant, but safe for completeness.
  983. # for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  984. # if (orig_item_id, orig_attr_name) not in processed_original_keys:
  985. # ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  986. # ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  987. # ws_attributes.cell(row=row_num, column=3, value=orig_value)
  988. # ws_attributes.cell(row=row_num, column=4, value="Original value only (Product not processed in batch)")
  989. # row_num += 1
  990. # # Adjust column widths for attributes
  991. # for col_dim, width in zip(['A', 'B', 'C', 'D'], [15, 35, 50, 50]):
  992. # ws_attributes.column_dimensions[col_dim].width = width
  993. # # Save the generated Excel (replace existing)
  994. # save_path = os.path.join(settings.MEDIA_ROOT, 'generated_products.xlsx')
  995. # wb.save(save_path)
  996. # logger.info(f"Excel file successfully saved to {save_path}")
  997. # # Write SUCCESS status
  998. # write_status("SUCCESS")
  999. # logger.info("Background task finished successfully.")
  1000. # except Exception as e:
  1001. # # Log the critical error and write FAILED status
  1002. # logger.exception("CRITICAL ERROR during background Excel generation process.")
  1003. # write_status("FAILED", error_msg=str(e))
  1004. # # -------------------------------------------------------------------------------------------------
  1005. # class ProductUploadExcelView(APIView):
  1006. # """
  1007. # POST API to upload an Excel file. (Unchanged)
  1008. # """
  1009. # parser_classes = (MultiPartParser, FormParser)
  1010. # def post(self, request, *args, **kwargs):
  1011. # file_obj = request.FILES.get('file')
  1012. # if not file_obj:
  1013. # return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  1014. # try:
  1015. # # Read all sheets from Excel file
  1016. # excel_file = pd.ExcelFile(file_obj)
  1017. # # Check if required sheets exist
  1018. # if 'Products' not in excel_file.sheet_names:
  1019. # logger.error(f"Upload failed: Missing 'Products' sheet in file.")
  1020. # return Response({
  1021. # 'error': "Missing 'Products' sheet",
  1022. # 'available_sheets': excel_file.sheet_names
  1023. # }, status=status.HTTP_400_BAD_REQUEST)
  1024. # # Read Products sheet
  1025. # df_products = pd.read_excel(excel_file, sheet_name='Products')
  1026. # df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
  1027. # # Check required columns for Products
  1028. # expected_product_cols = {
  1029. # 'item_id', 'product_name', 'product_long_description',
  1030. # 'product_short_description', 'product_type', 'image_path'
  1031. # }
  1032. # if not expected_product_cols.issubset(df_products.columns):
  1033. # logger.error(f"Upload failed: Missing required columns in Products sheet.")
  1034. # return Response({
  1035. # 'error': 'Missing required columns in Products sheet',
  1036. # 'required_columns': list(expected_product_cols),
  1037. # 'found_columns': list(df_products.columns)
  1038. # }, status=status.HTTP_400_BAD_REQUEST)
  1039. # # Read Attribute_values sheet if it exists
  1040. # df_attributes = None
  1041. # has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
  1042. # if has_attributes_sheet:
  1043. # df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
  1044. # df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
  1045. # # Check required columns for Attribute_values
  1046. # expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
  1047. # if not expected_attr_cols.issubset(df_attributes.columns):
  1048. # logger.error(f"Upload failed: Missing required columns in Attribute_values sheet.")
  1049. # return Response({
  1050. # 'error': 'Missing required columns in Attribute_values sheet',
  1051. # 'required_columns': list(expected_attr_cols),
  1052. # 'found_columns': list(df_attributes.columns)
  1053. # }, status=status.HTTP_400_BAD_REQUEST)
  1054. # # Initialize counters
  1055. # products_created = 0
  1056. # products_updated = 0
  1057. # attributes_created = 0
  1058. # attributes_updated = 0
  1059. # products_failed = 0
  1060. # attributes_failed = 0
  1061. # errors = []
  1062. # # Use transaction to ensure atomicity
  1063. # with transaction.atomic():
  1064. # # Process Products sheet
  1065. # for idx, row in df_products.iterrows():
  1066. # item_id = str(row.get('item_id', '')).strip()
  1067. # product_type = str(row.get('product_type', '')).strip()
  1068. # if not item_id:
  1069. # products_failed += 1
  1070. # errors.append(f"Products Row {idx + 2}: Missing item_id")
  1071. # continue
  1072. # try:
  1073. # # Auto-create ProductType if provided and doesn't exist
  1074. # if product_type:
  1075. # ProductType.objects.get_or_create(name=product_type)
  1076. # defaults = {
  1077. # 'product_name': str(row.get('product_name', '')),
  1078. # 'product_long_description': str(row.get('product_long_description', '')),
  1079. # 'product_short_description': str(row.get('product_short_description', '')),
  1080. # 'product_type': product_type,
  1081. # 'image_path': str(row.get('image_path', '')),
  1082. # }
  1083. # obj, created = Product.objects.update_or_create(
  1084. # item_id=item_id,
  1085. # defaults=defaults
  1086. # )
  1087. # if created:
  1088. # products_created += 1
  1089. # else:
  1090. # products_updated += 1
  1091. # except Exception as e:
  1092. # products_failed += 1
  1093. # errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
  1094. # logger.error(f"Error processing product {item_id} in Products sheet: {e}")
  1095. # # Process Attribute_values sheet if it exists
  1096. # if has_attributes_sheet and df_attributes is not None:
  1097. # # Group by item_id to optimize lookups
  1098. # item_ids_in_attrs = df_attributes['item_id'].astype(str).unique()
  1099. # # Fetch all products at once
  1100. # existing_products = {
  1101. # p.item_id: p
  1102. # for p in Product.objects.filter(item_id__in=item_ids_in_attrs)
  1103. # }
  1104. # for idx, row in df_attributes.iterrows():
  1105. # item_id = str(row.get('item_id', '')).strip()
  1106. # attribute_name = str(row.get('attribute_name', '')).strip()
  1107. # original_value = str(row.get('original_value', '')).strip()
  1108. # if not item_id or not attribute_name:
  1109. # attributes_failed += 1
  1110. # errors.append(
  1111. # f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name"
  1112. # )
  1113. # continue
  1114. # # Check if product exists
  1115. # product = existing_products.get(item_id)
  1116. # if not product:
  1117. # attributes_failed += 1
  1118. # errors.append(
  1119. # f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. "
  1120. # "Make sure it exists in Products sheet."
  1121. # )
  1122. # continue
  1123. # try:
  1124. # attr_obj, created = ProductAttributeValue.objects.update_or_create(
  1125. # product=product,
  1126. # attribute_name=attribute_name,
  1127. # defaults={'original_value': original_value}
  1128. # )
  1129. # if created:
  1130. # attributes_created += 1
  1131. # else:
  1132. # attributes_updated += 1
  1133. # except Exception as e:
  1134. # attributes_failed += 1
  1135. # errors.append(
  1136. # f"Attribute_values Row {idx + 2} "
  1137. # f"(item_id: {item_id}, attribute: {attribute_name}): {str(e)}"
  1138. # )
  1139. # logger.error(f"Error processing attribute {attribute_name} for product {item_id}: {e}")
  1140. # # Prepare response data
  1141. # response_data = {
  1142. # 'message': 'Upload completed',
  1143. # 'products': {
  1144. # 'created': products_created,
  1145. # 'updated': products_updated,
  1146. # 'failed': products_failed,
  1147. # 'total_processed': products_created + products_updated + products_failed
  1148. # },
  1149. # 'attribute_values': {
  1150. # 'created': attributes_created,
  1151. # 'updated': attributes_updated,
  1152. # 'failed': attributes_failed,
  1153. # 'total_processed': attributes_created + attributes_updated + attributes_failed
  1154. # } if has_attributes_sheet else {'message': 'Attribute_values sheet not found in Excel file'},
  1155. # 'generated_excel_status': 'Excel generation started in the background.'
  1156. # }
  1157. # if errors:
  1158. # response_data['errors'] = errors[:50]
  1159. # if len(errors) > 50:
  1160. # response_data['errors'].append(f"... and {len(errors) - 50} more errors")
  1161. # # Determine status code for upload
  1162. # upload_status = status.HTTP_201_CREATED if products_failed == 0 and attributes_failed == 0 else status.HTTP_207_MULTI_STATUS
  1163. # # Start background thread for Excel generation if upload was successful
  1164. # if products_failed == 0 and attributes_failed == 0:
  1165. # logger.info("API call successful. Triggering background Excel generation thread.")
  1166. # threading.Thread(target=generate_product_excel_background, daemon=True).start()
  1167. # # Update response to provide monitoring paths
  1168. # response_data['generated_excel_status'] = 'Background Excel generation triggered successfully.'
  1169. # response_data['monitoring'] = {
  1170. # 'excel_file': os.path.join(settings.MEDIA_URL, 'generated_products.xlsx'),
  1171. # 'status_file': os.path.join(settings.MEDIA_URL, 'excel_generation_status.json'),
  1172. # 'log_file': os.path.join(settings.MEDIA_URL, 'excel_generation.log'),
  1173. # 'note': 'These files will be available once the background process completes.'
  1174. # }
  1175. # else:
  1176. # logger.warning(f"API call finished with errors ({products_failed} products, {attributes_failed} attributes). Not triggering background excel generation.")
  1177. # response_data['generated_excel_status'] = 'Background Excel generation was NOT triggered due to upload errors. Fix upload errors and re-upload.'
  1178. # return Response(response_data, status=upload_status)
  1179. # except pd.errors.EmptyDataError:
  1180. # logger.error('The uploaded Excel file is empty or invalid.')
  1181. # return Response({
  1182. # 'error': 'The uploaded Excel file is empty or invalid'
  1183. # }, status=status.HTTP_400_BAD_REQUEST)
  1184. # except Exception as e:
  1185. # logger.exception(f'An unexpected error occurred while processing the file.')
  1186. # return Response({
  1187. # 'error': f'An unexpected error occurred while processing the file: {str(e)}'
  1188. # }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1189. import logging
  1190. import json
  1191. from rest_framework.views import APIView
  1192. from rest_framework.response import Response
  1193. from rest_framework import status
  1194. from rest_framework.parsers import MultiPartParser, FormParser
  1195. from django.db import transaction
  1196. from django.db.models import Prefetch
  1197. import pandas as pd
  1198. # Import ALL your models
  1199. from .models import Product, ProductAttributeValue, ProductType, ProductAttribute, AttributePossibleValue
  1200. from .services import ProductAttributeService
  1201. from .ocr_service import OCRService
  1202. from .visual_processing_service import VisualProcessingService
  1203. from openpyxl import Workbook
  1204. from openpyxl.styles import Font, PatternFill, Alignment
  1205. from django.conf import settings
  1206. import os
  1207. import threading
  1208. from datetime import datetime
  1209. # --- Configuration for Generated Outputs Folder ---
  1210. OUTPUT_FOLDER_NAME = 'generated_outputs'
  1211. OUTPUT_ROOT = os.path.join(settings.MEDIA_ROOT, OUTPUT_FOLDER_NAME)
  1212. OUTPUT_URL = os.path.join(settings.MEDIA_URL, OUTPUT_FOLDER_NAME).replace('\\', '/') # Use forward slashes for URL
  1213. # Define log and status file paths within the new subfolder
  1214. LOG_FILE_NAME = 'excel_generation.log'
  1215. STATUS_FILE_NAME = 'excel_generation_status.json'
  1216. EXCEL_FILE_NAME = 'generated_products.xlsx'
  1217. LOG_FILE_PATH = os.path.join(OUTPUT_ROOT, LOG_FILE_NAME)
  1218. STATUS_FILE_PATH = os.path.join(OUTPUT_ROOT, STATUS_FILE_NAME)
  1219. EXCEL_FILE_PATH = os.path.join(OUTPUT_ROOT, EXCEL_FILE_NAME)
  1220. # Ensure the OUTPUT_ROOT exists for files to be saved
  1221. if not os.path.exists(OUTPUT_ROOT):
  1222. os.makedirs(OUTPUT_ROOT)
  1223. # Configure basic logging to the new path
  1224. logging.basicConfig(
  1225. filename=LOG_FILE_PATH,
  1226. level=logging.INFO,
  1227. format='%(asctime)s - %(levelname)s - %(message)s'
  1228. )
  1229. logger = logging.getLogger(__name__)
  1230. # -------------------------------------------------------------------------------------------------
  1231. def generate_product_excel_background():
  1232. """
  1233. Function to perform batch attribute extraction for all products and generate an Excel file.
  1234. Runs in a background thread to avoid blocking the API response.
  1235. Logs success/failure and saves a status file for external monitoring.
  1236. """
  1237. logger.info(f"[{datetime.now().isoformat()}] Starting background product Excel generation and attribute extraction.")
  1238. successful = 0
  1239. failed = 0
  1240. results = [] # To store detailed extraction results for Excel sheet 2
  1241. # Function to write status file (SUCCESS/FAILED)
  1242. def write_status(status_type, error_msg=None):
  1243. status_data = {
  1244. "status": status_type,
  1245. "timestamp": datetime.now().isoformat(),
  1246. "products_processed": successful + failed,
  1247. "products_successful": successful,
  1248. "products_failed": failed,
  1249. # FIX: Use the updated EXCEL_FILE_NAME and OUTPUT_URL
  1250. "excel_path": os.path.join(OUTPUT_URL, EXCEL_FILE_NAME) if status_type == "SUCCESS" else None,
  1251. "log_path": os.path.join(OUTPUT_URL, LOG_FILE_NAME),
  1252. "error_message": error_msg
  1253. }
  1254. try:
  1255. # FIX: STATUS_FILE_PATH is now inside generated_outputs
  1256. with open(STATUS_FILE_PATH, 'w') as f:
  1257. json.dump(status_data, f, indent=4)
  1258. except Exception as e:
  1259. logger.exception(f"CRITICAL ERROR: Failed to write status file at {STATUS_FILE_PATH}: {e}")
  1260. try:
  1261. # 1. PREFETCH all necessary related data to minimize database queries
  1262. # Prefetch possible values for mandatory attributes
  1263. possible_values_prefetch = Prefetch(
  1264. 'attributes',
  1265. queryset=ProductAttribute.objects.filter(is_mandatory=True).prefetch_related('possible_values')
  1266. )
  1267. # Fetch all ProductTypes with their mandatory attributes and possible values
  1268. all_product_types = ProductType.objects.prefetch_related(possible_values_prefetch)
  1269. product_type_map = {
  1270. pt.name: pt for pt in all_product_types
  1271. }
  1272. # Prepare product_list for batch extraction
  1273. all_products = Product.objects.all()
  1274. product_list = []
  1275. for p in all_products:
  1276. mandatory_attrs_dict = {}
  1277. product_type_name = p.product_type.strip() if p.product_type else None
  1278. if product_type_name and product_type_name in product_type_map:
  1279. pt = product_type_map[product_type_name]
  1280. # Build the mandatory_attrs dictionary: { "Attribute Name": ["Value 1", "Value 2"], ... }
  1281. for attr in pt.attributes.all():
  1282. mandatory_attrs_dict[attr.name] = [
  1283. pv.value for pv in attr.possible_values.all()
  1284. ]
  1285. product_list.append({
  1286. "item_id": p.item_id,
  1287. "product_type_name": product_type_name,
  1288. "mandatory_attrs": mandatory_attrs_dict
  1289. })
  1290. # Batch settings (using defaults)
  1291. model = "llama-3.1-8b-instant"
  1292. extract_additional = True
  1293. process_image = False
  1294. multiple = []
  1295. threshold_abs = 0.65
  1296. margin = 0.15
  1297. use_dynamic_thresholds = True
  1298. use_adaptive_margin = True
  1299. use_semantic_clustering = True
  1300. # Batch extraction logic
  1301. item_ids = [p['item_id'] for p in product_list]
  1302. products_queryset = Product.objects.filter(item_id__in=item_ids)
  1303. product_map = {product.item_id: product for product in products_queryset}
  1304. found_ids = set(product_map.keys())
  1305. for product_entry in product_list:
  1306. item_id = product_entry['item_id']
  1307. mandatory_attrs = product_entry['mandatory_attrs']
  1308. if item_id not in found_ids:
  1309. failed += 1
  1310. results.append({
  1311. "product_id": item_id,
  1312. "error": "Product not found in database"
  1313. })
  1314. logger.warning(f"Product {item_id} not found in database. Skipping extraction.")
  1315. continue
  1316. product = product_map[item_id]
  1317. try:
  1318. title = product.product_name
  1319. short_desc = product.product_short_description
  1320. long_desc = product.product_long_description
  1321. image_url = product.image_path
  1322. ocr_results = None
  1323. ocr_text = None
  1324. visual_results = None
  1325. if process_image and image_url:
  1326. logger.info(f"Processing image for product {item_id}...")
  1327. # OCR Processing
  1328. ocr_service = OCRService()
  1329. ocr_results = ocr_service.process_image(image_url)
  1330. if ocr_results and ocr_results.get("detected_text"):
  1331. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  1332. ocr_results, model
  1333. )
  1334. ocr_results["extracted_attributes"] = ocr_attrs
  1335. ocr_text = "\n".join([
  1336. f"{item['text']} (confidence: {item['confidence']:.2f})"
  1337. for item in ocr_results["detected_text"]
  1338. ])
  1339. # Visual Processing
  1340. visual_service = VisualProcessingService()
  1341. product_type_hint = product.product_type if product.product_type else None
  1342. visual_results = visual_service.process_image(image_url, product_type_hint)
  1343. if visual_results and visual_results.get('visual_attributes'):
  1344. visual_results['visual_attributes'] = ProductAttributeService.format_visual_attributes(
  1345. visual_results['visual_attributes']
  1346. )
  1347. logger.info(f"Image processing done for product {item_id}.")
  1348. # Combine product text with source tracking
  1349. product_text, source_map = ProductAttributeService.combine_product_text(
  1350. title=title,
  1351. short_desc=short_desc,
  1352. long_desc=long_desc,
  1353. ocr_text=ocr_text
  1354. )
  1355. # Attribute Extraction with source tracking
  1356. extracted = ProductAttributeService.extract_attributes(
  1357. product_text=product_text,
  1358. mandatory_attrs=mandatory_attrs,
  1359. source_map=source_map,
  1360. model=model,
  1361. extract_additional=extract_additional,
  1362. multiple=multiple,
  1363. threshold_abs=threshold_abs,
  1364. margin=margin,
  1365. use_dynamic_thresholds=use_dynamic_thresholds,
  1366. use_adaptive_margin=use_adaptive_margin,
  1367. use_semantic_clustering=use_semantic_clustering
  1368. )
  1369. result = {
  1370. "product_id": item_id,
  1371. "mandatory": extracted.get("mandatory", {}),
  1372. "additional": extracted.get("additional", {}),
  1373. }
  1374. if ocr_results:
  1375. result["ocr_results"] = ocr_results
  1376. if visual_results:
  1377. result["visual_results"] = visual_results
  1378. results.append(result)
  1379. successful += 1
  1380. logger.info(f"Attribute extraction successful for product {item_id}.")
  1381. except Exception as e:
  1382. failed += 1
  1383. results.append({
  1384. "product_id": item_id,
  1385. "error": str(e)
  1386. })
  1387. logger.exception(f"Error during attribute extraction for product {item_id}.")
  1388. logger.info(f"Batch extraction phase complete. Successful: {successful}, Failed: {failed}")
  1389. # --------------------------------------------------------------------------------
  1390. # Generate and save the Excel file
  1391. # --------------------------------------------------------------------------------
  1392. wb = Workbook()
  1393. # Sheet 1: Products (from DB) (Logic is the same, skipped for brevity)
  1394. ws_products = wb.active
  1395. ws_products.title = "Products"
  1396. products_headers = ['ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE', 'Product Short Description', 'Product Long Description', 'image_path']
  1397. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  1398. header_font = Font(bold=True, color="FFFFFF")
  1399. for col_num, header in enumerate(products_headers, 1):
  1400. cell = ws_products.cell(row=1, column=col_num)
  1401. cell.value = header
  1402. cell.fill = header_fill
  1403. cell.font = header_font
  1404. cell.alignment = Alignment(horizontal="center", vertical="center")
  1405. all_products_db = Product.objects.all()
  1406. for row_num, product in enumerate(all_products_db, 2):
  1407. ws_products.cell(row=row_num, column=1, value=product.item_id)
  1408. ws_products.cell(row=row_num, column=2, value=product.product_name)
  1409. ws_products.cell(row=row_num, column=3, value=product.product_type)
  1410. ws_products.cell(row=row_num, column=4, value=product.product_short_description)
  1411. ws_products.cell(row=row_num, column=5, value=product.product_long_description)
  1412. ws_products.cell(row=row_num, column=6, value=product.image_path)
  1413. for col_dim, width in zip(['A', 'B', 'C', 'D', 'E', 'F'], [15, 25, 15, 35, 50, 45]):
  1414. ws_products.column_dimensions[col_dim].width = width
  1415. # Sheet 2: Attribute_values (Logic is the same, skipped for brevity)
  1416. ws_attributes = wb.create_sheet("Attribute_values")
  1417. attributes_headers = ['item_id', 'attribute_name', 'original_value', 'generated_value']
  1418. for col_num, header in enumerate(attributes_headers, 1):
  1419. cell = ws_attributes.cell(row=1, column=col_num)
  1420. cell.value = header
  1421. cell.fill = header_fill
  1422. cell.font = header_font
  1423. cell.alignment = Alignment(horizontal="center", vertical="center")
  1424. row_num = 2
  1425. all_original_attrs = ProductAttributeValue.objects.all()
  1426. original_attrs_lookup = {
  1427. (attr.product.item_id, attr.attribute_name): attr.original_value
  1428. for attr in all_original_attrs
  1429. }
  1430. processed_original_keys = set()
  1431. for res in results:
  1432. # ... (Excel writing logic for generated/original attributes remains unchanged)
  1433. item_id = res["product_id"]
  1434. if "error" in res:
  1435. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  1436. if orig_item_id == item_id:
  1437. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  1438. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  1439. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  1440. ws_attributes.cell(row=row_num, column=4, value=f"Extraction Failed: {res['error']}")
  1441. processed_original_keys.add((orig_item_id, orig_attr_name))
  1442. row_num += 1
  1443. continue
  1444. generated_attrs = {}
  1445. for cat in ["mandatory", "additional"]:
  1446. attrs = res.get(cat, {})
  1447. for attr_name, values in attrs.items():
  1448. for val in values:
  1449. key = (item_id, attr_name)
  1450. if key not in generated_attrs:
  1451. generated_attrs[key] = []
  1452. generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
  1453. ocr = res.get("ocr_results")
  1454. if ocr and "extracted_attributes" in ocr and isinstance(ocr["extracted_attributes"], dict):
  1455. for attr_name, values in ocr["extracted_attributes"].items():
  1456. for val in values:
  1457. key = (item_id, attr_name)
  1458. if key not in generated_attrs:
  1459. generated_attrs[key] = []
  1460. generated_attrs[key].append(f"{val['value']} (source: {val['source']})")
  1461. visual = res.get("visual_results")
  1462. if visual and "visual_attributes" in visual:
  1463. vis_attrs = visual["visual_attributes"]
  1464. if isinstance(vis_attrs, dict):
  1465. for attr_name, values in vis_attrs.items():
  1466. if not isinstance(values, list):
  1467. values = [{"value": values, "source": "visual"}]
  1468. for val in values:
  1469. key = (item_id, attr_name)
  1470. if key not in generated_attrs:
  1471. generated_attrs[key] = []
  1472. generated_attrs[key].append(f"{val['value']} (source: {val.get('source', 'visual')})")
  1473. elif isinstance(vis_attrs, list):
  1474. for item in vis_attrs:
  1475. attr_name = item.get("attribute_name") or item.get("name")
  1476. if not attr_name: continue
  1477. value = item.get("value", "")
  1478. source = item.get("source", "visual")
  1479. key = (item_id, attr_name)
  1480. if key not in generated_attrs:
  1481. generated_attrs[key] = []
  1482. generated_attrs[key].append(f"{value} (source: {source})")
  1483. for (attr_item_id, attr_name), gen_values in generated_attrs.items():
  1484. original_value = original_attrs_lookup.get((attr_item_id, attr_name), "")
  1485. generated_value = "; ".join(gen_values) if gen_values else ""
  1486. ws_attributes.cell(row=row_num, column=1, value=attr_item_id)
  1487. ws_attributes.cell(row=row_num, column=2, value=attr_name)
  1488. ws_attributes.cell(row=row_num, column=3, value=original_value)
  1489. ws_attributes.cell(row=row_num, column=4, value=generated_value)
  1490. processed_original_keys.add((attr_item_id, attr_name))
  1491. row_num += 1
  1492. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  1493. if orig_item_id == item_id and (orig_item_id, orig_attr_name) not in processed_original_keys:
  1494. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  1495. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  1496. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  1497. ws_attributes.cell(row=row_num, column=4, value="")
  1498. processed_original_keys.add((orig_item_id, orig_attr_name))
  1499. row_num += 1
  1500. for (orig_item_id, orig_attr_name), orig_value in original_attrs_lookup.items():
  1501. if (orig_item_id, orig_attr_name) not in processed_original_keys:
  1502. ws_attributes.cell(row=row_num, column=1, value=orig_item_id)
  1503. ws_attributes.cell(row=row_num, column=2, value=orig_attr_name)
  1504. ws_attributes.cell(row=row_num, column=3, value=orig_value)
  1505. ws_attributes.cell(row=row_num, column=4, value="Original value only (Product not processed in batch)")
  1506. row_num += 1
  1507. for col_dim, width in zip(['A', 'B', 'C', 'D'], [15, 35, 50, 50]):
  1508. ws_attributes.column_dimensions[col_dim].width = width
  1509. # FIX: Save to the new EXCEL_FILE_PATH
  1510. wb.save(EXCEL_FILE_PATH)
  1511. logger.info(f"Excel file successfully saved to {EXCEL_FILE_PATH}")
  1512. # Write SUCCESS status
  1513. write_status("SUCCESS")
  1514. logger.info("Background task finished successfully.")
  1515. except Exception as e:
  1516. # Log the critical error and write FAILED status
  1517. logger.exception("CRITICAL ERROR during background Excel generation process.")
  1518. write_status("FAILED", error_msg=str(e))
  1519. # -------------------------------------------------------------------------------------------------
  1520. class ProductUploadExcelView(APIView):
  1521. """
  1522. POST API to upload an Excel file.
  1523. """
  1524. parser_classes = (MultiPartParser, FormParser)
  1525. def post(self, request, *args, **kwargs):
  1526. file_obj = request.FILES.get('file')
  1527. if not file_obj:
  1528. return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  1529. try:
  1530. # ... (Upload and DB processing logic remains unchanged)
  1531. # Read all sheets from Excel file
  1532. excel_file = pd.ExcelFile(file_obj)
  1533. # Check if required sheets exist
  1534. if 'Products' not in excel_file.sheet_names:
  1535. logger.error(f"Upload failed: Missing 'Products' sheet in file.")
  1536. return Response({
  1537. 'error': "Missing 'Products' sheet",
  1538. 'available_sheets': excel_file.sheet_names
  1539. }, status=status.HTTP_400_BAD_REQUEST)
  1540. df_products = pd.read_excel(excel_file, sheet_name='Products')
  1541. df_products.columns = [c.strip().lower().replace(' ', '_') for c in df_products.columns]
  1542. expected_product_cols = {
  1543. 'item_id', 'product_name', 'product_long_description',
  1544. 'product_short_description', 'product_type', 'image_path'
  1545. }
  1546. if not expected_product_cols.issubset(df_products.columns):
  1547. logger.error(f"Upload failed: Missing required columns in Products sheet.")
  1548. return Response({
  1549. 'error': 'Missing required columns in Products sheet',
  1550. 'required_columns': list(expected_product_cols),
  1551. 'found_columns': list(df_products.columns)
  1552. }, status=status.HTTP_400_BAD_REQUEST)
  1553. df_attributes = None
  1554. has_attributes_sheet = 'Attribute_values' in excel_file.sheet_names
  1555. if has_attributes_sheet:
  1556. df_attributes = pd.read_excel(excel_file, sheet_name='Attribute_values')
  1557. df_attributes.columns = [c.strip().lower().replace(' ', '_') for c in df_attributes.columns]
  1558. expected_attr_cols = {'item_id', 'attribute_name', 'original_value'}
  1559. if not expected_attr_cols.issubset(df_attributes.columns):
  1560. logger.error(f"Upload failed: Missing required columns in Attribute_values sheet.")
  1561. return Response({
  1562. 'error': 'Missing required columns in Attribute_values sheet',
  1563. 'required_columns': list(expected_attr_cols),
  1564. 'found_columns': list(df_attributes.columns)
  1565. }, status=status.HTTP_400_BAD_REQUEST)
  1566. products_created = 0
  1567. products_updated = 0
  1568. attributes_created = 0
  1569. attributes_updated = 0
  1570. products_failed = 0
  1571. attributes_failed = 0
  1572. errors = []
  1573. with transaction.atomic():
  1574. for idx, row in df_products.iterrows():
  1575. item_id = str(row.get('item_id', '')).strip()
  1576. product_type = str(row.get('product_type', '')).strip()
  1577. if not item_id:
  1578. products_failed += 1
  1579. errors.append(f"Products Row {idx + 2}: Missing item_id")
  1580. continue
  1581. try:
  1582. if product_type:
  1583. ProductType.objects.get_or_create(name=product_type)
  1584. defaults = {
  1585. 'product_name': str(row.get('product_name', '')),
  1586. 'product_long_description': str(row.get('product_long_description', '')),
  1587. 'product_short_description': str(row.get('product_short_description', '')),
  1588. 'product_type': product_type,
  1589. 'image_path': str(row.get('image_path', '')),
  1590. }
  1591. obj, created = Product.objects.update_or_create(item_id=item_id, defaults=defaults)
  1592. if created: products_created += 1
  1593. else: products_updated += 1
  1594. except Exception as e:
  1595. products_failed += 1
  1596. errors.append(f"Products Row {idx + 2} (item_id: {item_id}): {str(e)}")
  1597. logger.error(f"Error processing product {item_id} in Products sheet: {e}")
  1598. if has_attributes_sheet and df_attributes is not None:
  1599. item_ids_in_attrs = df_attributes['item_id'].astype(str).unique()
  1600. existing_products = {p.item_id: p for p in Product.objects.filter(item_id__in=item_ids_in_attrs)}
  1601. for idx, row in df_attributes.iterrows():
  1602. item_id = str(row.get('item_id', '')).strip()
  1603. attribute_name = str(row.get('attribute_name', '')).strip()
  1604. original_value = str(row.get('original_value', '')).strip()
  1605. if not item_id or not attribute_name:
  1606. attributes_failed += 1
  1607. errors.append(f"Attribute_values Row {idx + 2}: Missing item_id or attribute_name")
  1608. continue
  1609. product = existing_products.get(item_id)
  1610. if not product:
  1611. attributes_failed += 1
  1612. errors.append(f"Attribute_values Row {idx + 2}: Product with item_id '{item_id}' not found. Make sure it exists in Products sheet.")
  1613. continue
  1614. try:
  1615. attr_obj, created = ProductAttributeValue.objects.update_or_create(
  1616. product=product,
  1617. attribute_name=attribute_name,
  1618. defaults={'original_value': original_value}
  1619. )
  1620. if created: attributes_created += 1
  1621. else: attributes_updated += 1
  1622. except Exception as e:
  1623. attributes_failed += 1
  1624. errors.append(f"Attribute_values Row {idx + 2} (item_id: {item_id}, attribute: {attribute_name}): {str(e)}")
  1625. logger.error(f"Error processing attribute {attribute_name} for product {item_id}: {e}")
  1626. # Prepare response data
  1627. response_data = {
  1628. 'message': 'Upload completed',
  1629. 'products': {
  1630. 'created': products_created, 'updated': products_updated, 'failed': products_failed,
  1631. 'total_processed': products_created + products_updated + products_failed
  1632. },
  1633. 'attribute_values': {
  1634. 'created': attributes_created, 'updated': attributes_updated, 'failed': attributes_failed,
  1635. 'total_processed': attributes_created + attributes_updated + attributes_failed
  1636. } if has_attributes_sheet else {'message': 'Attribute_values sheet not found in Excel file'},
  1637. 'generated_excel_status': 'Excel generation started in the background.'
  1638. }
  1639. if errors:
  1640. response_data['errors'] = errors[:50]
  1641. if len(errors) > 50:
  1642. response_data['errors'].append(f"... and {len(errors) - 50} more errors")
  1643. upload_status = status.HTTP_201_CREATED if products_failed == 0 and attributes_failed == 0 else status.HTTP_207_MULTI_STATUS
  1644. # Start background thread for Excel generation if upload was successful
  1645. if products_failed == 0 and attributes_failed == 0:
  1646. logger.info("API call successful. Triggering background Excel generation thread.")
  1647. threading.Thread(target=generate_product_excel_background, daemon=True).start()
  1648. # FIX: Update monitoring URLs to point to the new generated_outputs subfolder
  1649. response_data['generated_excel_status'] = 'Background Excel generation triggered successfully.'
  1650. response_data['monitoring'] = {
  1651. 'excel_file': os.path.join(OUTPUT_URL, EXCEL_FILE_NAME),
  1652. 'status_file': os.path.join(OUTPUT_URL, STATUS_FILE_NAME),
  1653. 'log_file': os.path.join(OUTPUT_URL, LOG_FILE_NAME),
  1654. 'note': 'These files will be available once the background process completes.'
  1655. }
  1656. else:
  1657. logger.warning(f"API call finished with errors ({products_failed} products, {attributes_failed} attributes). Not triggering background excel generation.")
  1658. response_data['generated_excel_status'] = 'Background Excel generation was NOT triggered due to upload errors. Fix upload errors and re-upload.'
  1659. return Response(response_data, status=upload_status)
  1660. except pd.errors.EmptyDataError:
  1661. logger.error('The uploaded Excel file is empty or invalid.')
  1662. return Response({'error': 'The uploaded Excel file is empty or invalid'}, status=status.HTTP_400_BAD_REQUEST)
  1663. except Exception as e:
  1664. logger.exception(f'An unexpected error occurred while processing the file.')
  1665. return Response({'error': f'An unexpected error occurred while processing the file: {str(e)}'}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1666. # Add this view to your views.py for downloading a template
  1667. from django.http import HttpResponse
  1668. from openpyxl import Workbook
  1669. from openpyxl.styles import Font, PatternFill, Alignment
  1670. from rest_framework.views import APIView
  1671. import io
  1672. class DownloadExcelTemplateView(APIView):
  1673. """
  1674. GET API to download an Excel template with two sheets:
  1675. 1. Products sheet with sample data
  1676. 2. Attribute_values sheet with sample data
  1677. """
  1678. def get(self, request):
  1679. # Create a new workbook
  1680. wb = Workbook()
  1681. # Remove default sheet
  1682. if 'Sheet' in wb.sheetnames:
  1683. wb.remove(wb['Sheet'])
  1684. # ===== Create Products Sheet =====
  1685. ws_products = wb.create_sheet("Products", 0)
  1686. # Define headers for Products
  1687. products_headers = [
  1688. 'ITEM ID',
  1689. 'PRODUCT NAME',
  1690. 'PRODUCT TYPE',
  1691. 'Product Short Description',
  1692. 'Product Long Description',
  1693. 'image_path'
  1694. ]
  1695. # Style for headers
  1696. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  1697. header_font = Font(bold=True, color="FFFFFF")
  1698. # Add headers to Products sheet
  1699. for col_num, header in enumerate(products_headers, 1):
  1700. cell = ws_products.cell(row=1, column=col_num)
  1701. cell.value = header
  1702. cell.fill = header_fill
  1703. cell.font = header_font
  1704. cell.alignment = Alignment(horizontal="center", vertical="center")
  1705. # Add sample data to Products sheet
  1706. sample_products = [
  1707. [
  1708. '3217373735',
  1709. 'Blue V-Neck T-Shirt',
  1710. 'Clothing',
  1711. 'Stylish blue t-shirt with v-neck design',
  1712. 'Premium quality cotton t-shirt featuring a classic v-neck design. Perfect for casual wear. Available in vibrant blue color.',
  1713. 'https://images.unsplash.com/photo-1521572163474-6864f9cf17ab'
  1714. ],
  1715. [
  1716. '1234567890',
  1717. 'Red Cotton Dress',
  1718. 'Clothing',
  1719. 'Beautiful red dress for special occasions',
  1720. 'Elegant red dress made from 100% cotton fabric. Features a flowing design perfect for summer events and parties.',
  1721. 'https://images.unsplash.com/photo-1595777457583-95e059d581b8'
  1722. ],
  1723. [
  1724. '9876543210',
  1725. 'Steel Screws Pack',
  1726. 'Hardware',
  1727. 'Pack of zinc plated steel screws',
  1728. 'Professional grade steel screws with zinc plating for corrosion resistance. Pack contains 50 pieces, 2 inch length, M6 thread size.',
  1729. 'https://images.unsplash.com/photo-1542272604-787c3835535d'
  1730. ]
  1731. ]
  1732. for row_num, row_data in enumerate(sample_products, 2):
  1733. for col_num, value in enumerate(row_data, 1):
  1734. ws_products.cell(row=row_num, column=col_num, value=value)
  1735. # Adjust column widths for Products sheet
  1736. ws_products.column_dimensions['A'].width = 15 # ITEM ID
  1737. ws_products.column_dimensions['B'].width = 25 # PRODUCT NAME
  1738. ws_products.column_dimensions['C'].width = 15 # PRODUCT TYPE
  1739. ws_products.column_dimensions['D'].width = 35 # Short Description
  1740. ws_products.column_dimensions['E'].width = 50 # Long Description
  1741. ws_products.column_dimensions['F'].width = 45 # image_path
  1742. # ===== Create Attribute_values Sheet =====
  1743. ws_attributes = wb.create_sheet("Attribute_values", 1)
  1744. # Define headers for Attribute_values
  1745. attributes_headers = ['item_id', 'attribute_name', 'original_value']
  1746. # Add headers to Attribute_values sheet
  1747. for col_num, header in enumerate(attributes_headers, 1):
  1748. cell = ws_attributes.cell(row=1, column=col_num)
  1749. cell.value = header
  1750. cell.fill = header_fill
  1751. cell.font = header_font
  1752. cell.alignment = Alignment(horizontal="center", vertical="center")
  1753. # Add sample data to Attribute_values sheet
  1754. sample_attributes = [
  1755. ['3217373735', 'Clothing Neck Style', 'V-Neck Square'],
  1756. ['3217373735', 'Condition', 'New with tags'],
  1757. ['3217373735', 'Material', '100% Cotton'],
  1758. ['3217373735', 'Color', 'Sky Blue'],
  1759. ['3217373735', 'Size', 'Medium'],
  1760. ['1234567890', 'Sleeve Length', 'Sleeveless'],
  1761. ['1234567890', 'Condition', 'Brand New'],
  1762. ['1234567890', 'Pattern', 'Solid'],
  1763. ['1234567890', 'Material', 'Cotton Blend'],
  1764. ['1234567890', 'Color', 'Crimson Red'],
  1765. ['9876543210', 'Material', 'Stainless Steel'],
  1766. ['9876543210', 'Thread Size', 'M6'],
  1767. ['9876543210', 'Length', '2 inches'],
  1768. ['9876543210', 'Coating', 'Zinc Plated'],
  1769. ['9876543210', 'Package Quantity', '50 pieces'],
  1770. ]
  1771. for row_num, row_data in enumerate(sample_attributes, 2):
  1772. for col_num, value in enumerate(row_data, 1):
  1773. ws_attributes.cell(row=row_num, column=col_num, value=value)
  1774. # Adjust column widths for Attribute_values sheet
  1775. ws_attributes.column_dimensions['A'].width = 15 # item_id
  1776. ws_attributes.column_dimensions['B'].width = 25 # attribute_name
  1777. ws_attributes.column_dimensions['C'].width = 30 # original_value
  1778. # Add instructions sheet
  1779. ws_instructions = wb.create_sheet("Instructions", 2)
  1780. instructions_text = [
  1781. ['Excel Upload Instructions', ''],
  1782. ['', ''],
  1783. ['Sheet 1: Products', ''],
  1784. ['- Contains product basic information', ''],
  1785. ['- All columns are required', ''],
  1786. ['- ITEM ID must be unique', ''],
  1787. ['', ''],
  1788. ['Sheet 2: Attribute_values', ''],
  1789. ['- Contains original/manual attribute values', ''],
  1790. ['- item_id must match an ITEM ID from Products sheet', ''],
  1791. ['- Multiple rows can have the same item_id (for different attributes)', ''],
  1792. ['- Each attribute per product should be on a separate row', ''],
  1793. ['', ''],
  1794. ['Upload Process:', ''],
  1795. ['1. Fill in your product data in the Products sheet', ''],
  1796. ['2. Fill in attribute values in the Attribute_values sheet', ''],
  1797. ['3. Ensure item_id values match between both sheets', ''],
  1798. ['4. Save the file and upload via API', ''],
  1799. ['', ''],
  1800. ['Notes:', ''],
  1801. ['- Do not change sheet names (must be "Products" and "Attribute_values")', ''],
  1802. ['- Do not change column header names', ''],
  1803. ['- You can delete the sample data rows', ''],
  1804. ['- You can delete this Instructions sheet before uploading', ''],
  1805. ]
  1806. for row_num, row_data in enumerate(instructions_text, 1):
  1807. ws_instructions.cell(row=row_num, column=1, value=row_data[0])
  1808. if row_num == 1:
  1809. cell = ws_instructions.cell(row=row_num, column=1)
  1810. cell.font = Font(bold=True, size=14)
  1811. ws_instructions.column_dimensions['A'].width = 60
  1812. # Save to BytesIO
  1813. output = io.BytesIO()
  1814. wb.save(output)
  1815. output.seek(0)
  1816. # Create response
  1817. response = HttpResponse(
  1818. output.getvalue(),
  1819. content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
  1820. )
  1821. response['Content-Disposition'] = 'attachment; filename=product_upload_template.xlsx'
  1822. return response
  1823. class DownloadProductsWithAttributesExcelView(APIView):
  1824. """
  1825. GET API to download existing products with their attribute values as Excel.
  1826. Useful for users to update existing data.
  1827. """
  1828. def get(self, request):
  1829. from .models import Product, ProductAttributeValue
  1830. # Create workbook
  1831. wb = Workbook()
  1832. if 'Sheet' in wb.sheetnames:
  1833. wb.remove(wb['Sheet'])
  1834. # ===== Products Sheet =====
  1835. ws_products = wb.create_sheet("Products", 0)
  1836. # Headers
  1837. products_headers = [
  1838. 'ITEM ID', 'PRODUCT NAME', 'PRODUCT TYPE',
  1839. 'Product Short Description', 'Product Long Description', 'image_path'
  1840. ]
  1841. header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
  1842. header_font = Font(bold=True, color="FFFFFF")
  1843. for col_num, header in enumerate(products_headers, 1):
  1844. cell = ws_products.cell(row=1, column=col_num)
  1845. cell.value = header
  1846. cell.fill = header_fill
  1847. cell.font = header_font
  1848. cell.alignment = Alignment(horizontal="center", vertical="center")
  1849. # Fetch and add product data
  1850. products = Product.objects.all()
  1851. for row_num, product in enumerate(products, 2):
  1852. ws_products.cell(row=row_num, column=1, value=product.item_id)
  1853. ws_products.cell(row=row_num, column=2, value=product.product_name)
  1854. ws_products.cell(row=row_num, column=3, value=product.product_type)
  1855. ws_products.cell(row=row_num, column=4, value=product.product_short_description)
  1856. ws_products.cell(row=row_num, column=5, value=product.product_long_description)
  1857. ws_products.cell(row=row_num, column=6, value=product.image_path)
  1858. # Adjust widths
  1859. ws_products.column_dimensions['A'].width = 15
  1860. ws_products.column_dimensions['B'].width = 25
  1861. ws_products.column_dimensions['C'].width = 15
  1862. ws_products.column_dimensions['D'].width = 35
  1863. ws_products.column_dimensions['E'].width = 50
  1864. ws_products.column_dimensions['F'].width = 45
  1865. # ===== Attribute_values Sheet =====
  1866. ws_attributes = wb.create_sheet("Attribute_values", 1)
  1867. attributes_headers = ['item_id', 'attribute_name', 'original_value']
  1868. for col_num, header in enumerate(attributes_headers, 1):
  1869. cell = ws_attributes.cell(row=1, column=col_num)
  1870. cell.value = header
  1871. cell.fill = header_fill
  1872. cell.font = header_font
  1873. cell.alignment = Alignment(horizontal="center", vertical="center")
  1874. # Fetch and add attribute values
  1875. attributes = ProductAttributeValue.objects.select_related('product').all()
  1876. for row_num, attr in enumerate(attributes, 2):
  1877. ws_attributes.cell(row=row_num, column=1, value=attr.product.item_id)
  1878. ws_attributes.cell(row=row_num, column=2, value=attr.attribute_name)
  1879. ws_attributes.cell(row=row_num, column=3, value=attr.original_value)
  1880. ws_attributes.column_dimensions['A'].width = 15
  1881. ws_attributes.column_dimensions['B'].width = 25
  1882. ws_attributes.column_dimensions['C'].width = 30
  1883. # Save to BytesIO
  1884. output = io.BytesIO()
  1885. wb.save(output)
  1886. output.seek(0)
  1887. response = HttpResponse(
  1888. output.getvalue(),
  1889. content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
  1890. )
  1891. response['Content-Disposition'] = 'attachment; filename=products_export.xlsx'
  1892. return response
  1893. class ProductAttributesUploadView(APIView):
  1894. """
  1895. POST API to upload an Excel file and add mandatory/additional attributes
  1896. for product types with possible values.
  1897. """
  1898. parser_classes = (MultiPartParser, FormParser)
  1899. def post(self, request):
  1900. file_obj = request.FILES.get('file')
  1901. if not file_obj:
  1902. return Response({"error": "No file provided."}, status=status.HTTP_400_BAD_REQUEST)
  1903. try:
  1904. df = pd.read_excel(file_obj)
  1905. required_columns = {'product_type', 'attribute_name', 'is_mandatory', 'possible_values'}
  1906. if not required_columns.issubset(df.columns):
  1907. return Response({
  1908. "error": f"Missing required columns. Found: {list(df.columns)}"
  1909. }, status=status.HTTP_400_BAD_REQUEST)
  1910. for _, row in df.iterrows():
  1911. product_type_name = str(row['product_type']).strip()
  1912. attr_name = str(row['attribute_name']).strip()
  1913. is_mandatory = str(row['is_mandatory']).strip().lower() in ['yes', 'true', '1']
  1914. possible_values = str(row.get('possible_values', '')).strip()
  1915. # Get or create product type
  1916. product_type, _ = ProductType.objects.get_or_create(name=product_type_name)
  1917. # Get or create attribute
  1918. attribute, _ = ProductAttribute.objects.get_or_create(
  1919. product_type=product_type,
  1920. name=attr_name,
  1921. defaults={'is_mandatory': is_mandatory}
  1922. )
  1923. attribute.is_mandatory = is_mandatory
  1924. attribute.save()
  1925. # Handle possible values
  1926. AttributePossibleValue.objects.filter(attribute=attribute).delete()
  1927. if possible_values:
  1928. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1929. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1930. return Response({"message": "Attributes uploaded successfully."}, status=status.HTTP_201_CREATED)
  1931. except Exception as e:
  1932. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  1933. class ProductTypeAttributesView(APIView):
  1934. """
  1935. API to view, create, update, and delete product type attributes and their possible values.
  1936. Also supports dynamic product type creation.
  1937. """
  1938. def get(self, request):
  1939. """
  1940. Retrieve all product types with their attributes and possible values.
  1941. """
  1942. product_types = ProductType.objects.all()
  1943. serializer = ProductTypeSerializer(product_types, many=True)
  1944. # Transform the serialized data into the requested format
  1945. result = []
  1946. for pt in serializer.data:
  1947. for attr in pt['attributes']:
  1948. result.append({
  1949. 'product_type': pt['name'],
  1950. 'attribute_name': attr['name'],
  1951. 'is_mandatory': 'Yes' if attr['is_mandatory'] else 'No',
  1952. 'possible_values': ', '.join([pv['value'] for pv in attr['possible_values']])
  1953. })
  1954. return Response(result, status=status.HTTP_200_OK)
  1955. def post(self, request):
  1956. """
  1957. Create a new product type or attribute with possible values.
  1958. Expected payload example:
  1959. {
  1960. "product_type": "Hardware Screws",
  1961. "attribute_name": "Material",
  1962. "is_mandatory": "Yes",
  1963. "possible_values": "Steel, Zinc Plated, Stainless Steel"
  1964. }
  1965. """
  1966. try:
  1967. product_type_name = request.data.get('product_type')
  1968. attribute_name = request.data.get('attribute_name', '')
  1969. is_mandatory = request.data.get('is_mandatory', '').lower() in ['yes', 'true', '1']
  1970. possible_values = request.data.get('possible_values', '')
  1971. if not product_type_name:
  1972. return Response({
  1973. "error": "product_type is required"
  1974. }, status=status.HTTP_400_BAD_REQUEST)
  1975. with transaction.atomic():
  1976. # Get or create product type
  1977. product_type, created = ProductType.objects.get_or_create(name=product_type_name)
  1978. if created and not attribute_name:
  1979. return Response({
  1980. "message": f"Product type '{product_type_name}' created successfully",
  1981. "data": {"product_type": product_type_name}
  1982. }, status=status.HTTP_201_CREATED)
  1983. if attribute_name:
  1984. # Create attribute
  1985. attribute, attr_created = ProductAttribute.objects.get_or_create(
  1986. product_type=product_type,
  1987. name=attribute_name,
  1988. defaults={'is_mandatory': is_mandatory}
  1989. )
  1990. if not attr_created:
  1991. return Response({
  1992. "error": f"Attribute '{attribute_name}' already exists for product type '{product_type_name}'"
  1993. }, status=status.HTTP_400_BAD_REQUEST)
  1994. # Handle possible values
  1995. if possible_values:
  1996. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  1997. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  1998. return Response({
  1999. "message": "Attribute created successfully",
  2000. "data": {
  2001. "product_type": product_type_name,
  2002. "attribute_name": attribute_name,
  2003. "is_mandatory": "Yes" if is_mandatory else "No",
  2004. "possible_values": possible_values
  2005. }
  2006. }, status=status.HTTP_201_CREATED)
  2007. return Response({
  2008. "message": f"Product type '{product_type_name}' already exists",
  2009. "data": {"product_type": product_type_name}
  2010. }, status=status.HTTP_200_OK)
  2011. except Exception as e:
  2012. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  2013. def put(self, request):
  2014. """
  2015. Update an existing product type attribute and its possible values.
  2016. Expected payload example:
  2017. {
  2018. "product_type": "Hardware Screws",
  2019. "attribute_name": "Material",
  2020. "is_mandatory": "Yes",
  2021. "possible_values": "Steel, Zinc Plated, Stainless Steel, Brass"
  2022. }
  2023. """
  2024. try:
  2025. product_type_name = request.data.get('product_type')
  2026. attribute_name = request.data.get('attribute_name')
  2027. is_mandatory = request.data.get('is_mandatory', '').lower() in ['yes', 'true', '1']
  2028. possible_values = request.data.get('possible_values', '')
  2029. if not all([product_type_name, attribute_name]):
  2030. return Response({
  2031. "error": "product_type and attribute_name are required"
  2032. }, status=status.HTTP_400_BAD_REQUEST)
  2033. with transaction.atomic():
  2034. try:
  2035. product_type = ProductType.objects.get(name=product_type_name)
  2036. attribute = ProductAttribute.objects.get(
  2037. product_type=product_type,
  2038. name=attribute_name
  2039. )
  2040. except ProductType.DoesNotExist:
  2041. return Response({
  2042. "error": f"Product type '{product_type_name}' not found"
  2043. }, status=status.HTTP_404_NOT_FOUND)
  2044. except ProductAttribute.DoesNotExist:
  2045. return Response({
  2046. "error": f"Attribute '{attribute_name}' not found for product type '{product_type_name}'"
  2047. }, status=status.HTTP_404_NOT_FOUND)
  2048. # Update attribute
  2049. attribute.is_mandatory = is_mandatory
  2050. attribute.save()
  2051. # Update possible values
  2052. AttributePossibleValue.objects.filter(attribute=attribute).delete()
  2053. if possible_values:
  2054. for val in [v.strip() for v in possible_values.split(',') if v.strip()]:
  2055. AttributePossibleValue.objects.create(attribute=attribute, value=val)
  2056. return Response({
  2057. "message": "Attribute updated successfully",
  2058. "data": {
  2059. "product_type": product_type_name,
  2060. "attribute_name": attribute_name,
  2061. "is_mandatory": "Yes" if is_mandatory else "No",
  2062. "possible_values": possible_values
  2063. }
  2064. }, status=status.HTTP_200_OK)
  2065. except Exception as e:
  2066. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  2067. def delete(self, request):
  2068. """
  2069. Delete a product type or a specific attribute.
  2070. Expected payload example:
  2071. {
  2072. "product_type": "Hardware Screws",
  2073. "attribute_name": "Material"
  2074. }
  2075. """
  2076. try:
  2077. product_type_name = request.data.get('product_type')
  2078. attribute_name = request.data.get('attribute_name', '')
  2079. if not product_type_name:
  2080. return Response({
  2081. "error": "product_type is required"
  2082. }, status=status.HTTP_400_BAD_REQUEST)
  2083. with transaction.atomic():
  2084. try:
  2085. product_type = ProductType.objects.get(name=product_type_name)
  2086. except ProductType.DoesNotExist:
  2087. return Response({
  2088. "error": f"Product type '{product_type_name}' not found"
  2089. }, status=status.HTTP_404_NOT_FOUND)
  2090. if attribute_name:
  2091. # Delete specific attribute
  2092. try:
  2093. attribute = ProductAttribute.objects.get(
  2094. product_type=product_type,
  2095. name=attribute_name
  2096. )
  2097. attribute.delete()
  2098. return Response({
  2099. "message": f"Attribute '{attribute_name}' deleted successfully from product type '{product_type_name}'"
  2100. }, status=status.HTTP_200_OK)
  2101. except ProductAttribute.DoesNotExist:
  2102. return Response({
  2103. "error": f"Attribute '{attribute_name}' not found for product type '{product_type_name}'"
  2104. }, status=status.HTTP_404_NOT_FOUND)
  2105. else:
  2106. # Delete entire product type
  2107. product_type.delete()
  2108. return Response({
  2109. "message": f"Product type '{product_type_name}' and all its attributes deleted successfully"
  2110. }, status=status.HTTP_200_OK)
  2111. except Exception as e:
  2112. return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  2113. class ProductTypeListView(APIView):
  2114. """
  2115. GET API to list all product types (only names).
  2116. """
  2117. def get(self, request):
  2118. product_types = ProductType.objects.values_list('name', flat=True)
  2119. return Response({"product_types": list(product_types)}, status=status.HTTP_200_OK)
  2120. # Add these views to your views.py
  2121. from rest_framework.views import APIView
  2122. from rest_framework.response import Response
  2123. from rest_framework import status
  2124. from rest_framework.parsers import MultiPartParser, FormParser
  2125. from django.db import transaction
  2126. import pandas as pd
  2127. from .models import Product, ProductAttributeValue
  2128. from .serializers import (
  2129. ProductAttributeValueSerializer,
  2130. ProductAttributeValueInputSerializer,
  2131. BulkProductAttributeValueSerializer,
  2132. ProductWithAttributesSerializer
  2133. )
  2134. class ProductAttributeValueView(APIView):
  2135. """
  2136. API to manage manually entered original attribute values.
  2137. GET: Retrieve all attribute values for a product
  2138. POST: Create or update attribute values for a product
  2139. DELETE: Delete attribute values
  2140. """
  2141. def get(self, request):
  2142. """
  2143. Get original attribute values for a specific product or all products.
  2144. Query params: item_id (optional)
  2145. """
  2146. item_id = request.query_params.get('item_id')
  2147. if item_id:
  2148. try:
  2149. product = Product.objects.get(item_id=item_id)
  2150. values = ProductAttributeValue.objects.filter(product=product)
  2151. serializer = ProductAttributeValueSerializer(values, many=True)
  2152. return Response({
  2153. "item_id": item_id,
  2154. "attributes": serializer.data
  2155. }, status=status.HTTP_200_OK)
  2156. except Product.DoesNotExist:
  2157. return Response({
  2158. "error": f"Product with item_id '{item_id}' not found"
  2159. }, status=status.HTTP_404_NOT_FOUND)
  2160. else:
  2161. # Return all attribute values grouped by product
  2162. values = ProductAttributeValue.objects.all().select_related('product')
  2163. serializer = ProductAttributeValueSerializer(values, many=True)
  2164. return Response(serializer.data, status=status.HTTP_200_OK)
  2165. def post(self, request):
  2166. """
  2167. Create or update original attribute value for a product.
  2168. Expected payload:
  2169. {
  2170. "item_id": "3217373735",
  2171. "attribute_name": "Clothing Neck Style",
  2172. "original_value": "V-Neck Square"
  2173. }
  2174. """
  2175. serializer = ProductAttributeValueInputSerializer(data=request.data)
  2176. if not serializer.is_valid():
  2177. return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
  2178. validated_data = serializer.validated_data
  2179. item_id = validated_data['item_id']
  2180. attribute_name = validated_data['attribute_name']
  2181. original_value = validated_data['original_value']
  2182. try:
  2183. product = Product.objects.get(item_id=item_id)
  2184. except Product.DoesNotExist:
  2185. return Response({
  2186. "error": f"Product with item_id '{item_id}' not found"
  2187. }, status=status.HTTP_404_NOT_FOUND)
  2188. # Create or update the attribute value
  2189. attr_value, created = ProductAttributeValue.objects.update_or_create(
  2190. product=product,
  2191. attribute_name=attribute_name,
  2192. defaults={'original_value': original_value}
  2193. )
  2194. response_serializer = ProductAttributeValueSerializer(attr_value)
  2195. return Response({
  2196. "message": "Attribute value created" if created else "Attribute value updated",
  2197. "data": response_serializer.data
  2198. }, status=status.HTTP_201_CREATED if created else status.HTTP_200_OK)
  2199. def delete(self, request):
  2200. """
  2201. Delete original attribute value(s).
  2202. Expected payload:
  2203. {
  2204. "item_id": "3217373735",
  2205. "attribute_name": "Clothing Neck Style" # Optional, if not provided deletes all for product
  2206. }
  2207. """
  2208. item_id = request.data.get('item_id')
  2209. attribute_name = request.data.get('attribute_name')
  2210. if not item_id:
  2211. return Response({
  2212. "error": "item_id is required"
  2213. }, status=status.HTTP_400_BAD_REQUEST)
  2214. try:
  2215. product = Product.objects.get(item_id=item_id)
  2216. except Product.DoesNotExist:
  2217. return Response({
  2218. "error": f"Product with item_id '{item_id}' not found"
  2219. }, status=status.HTTP_404_NOT_FOUND)
  2220. if attribute_name:
  2221. # Delete specific attribute
  2222. deleted_count, _ = ProductAttributeValue.objects.filter(
  2223. product=product,
  2224. attribute_name=attribute_name
  2225. ).delete()
  2226. if deleted_count == 0:
  2227. return Response({
  2228. "error": f"Attribute '{attribute_name}' not found for product '{item_id}'"
  2229. }, status=status.HTTP_404_NOT_FOUND)
  2230. return Response({
  2231. "message": f"Attribute '{attribute_name}' deleted successfully"
  2232. }, status=status.HTTP_200_OK)
  2233. else:
  2234. # Delete all attributes for product
  2235. deleted_count, _ = ProductAttributeValue.objects.filter(product=product).delete()
  2236. return Response({
  2237. "message": f"Deleted {deleted_count} attribute(s) for product '{item_id}'"
  2238. }, status=status.HTTP_200_OK)
  2239. class BulkProductAttributeValueView(APIView):
  2240. """
  2241. API for bulk operations on original attribute values.
  2242. POST: Create/update multiple attribute values at once
  2243. """
  2244. def post(self, request):
  2245. """
  2246. Bulk create or update attribute values for multiple products.
  2247. Expected payload:
  2248. {
  2249. "products": [
  2250. {
  2251. "item_id": "3217373735",
  2252. "attributes": {
  2253. "Clothing Neck Style": "V-Neck Square",
  2254. "Condition": "New with tags"
  2255. }
  2256. },
  2257. {
  2258. "item_id": "1234567890",
  2259. "attributes": {
  2260. "Material": "Cotton",
  2261. "Size": "L"
  2262. }
  2263. }
  2264. ]
  2265. }
  2266. """
  2267. products_data = request.data.get('products', [])
  2268. if not products_data:
  2269. return Response({
  2270. "error": "products list is required"
  2271. }, status=status.HTTP_400_BAD_REQUEST)
  2272. results = []
  2273. successful = 0
  2274. failed = 0
  2275. with transaction.atomic():
  2276. for product_data in products_data:
  2277. serializer = BulkProductAttributeValueSerializer(data=product_data)
  2278. if not serializer.is_valid():
  2279. failed += 1
  2280. results.append({
  2281. "item_id": product_data.get('item_id'),
  2282. "status": "failed",
  2283. "error": serializer.errors
  2284. })
  2285. continue
  2286. validated_data = serializer.validated_data
  2287. item_id = validated_data['item_id']
  2288. attributes = validated_data['attributes']
  2289. try:
  2290. product = Product.objects.get(item_id=item_id)
  2291. created_count = 0
  2292. updated_count = 0
  2293. for attr_name, original_value in attributes.items():
  2294. _, created = ProductAttributeValue.objects.update_or_create(
  2295. product=product,
  2296. attribute_name=attr_name,
  2297. defaults={'original_value': original_value}
  2298. )
  2299. if created:
  2300. created_count += 1
  2301. else:
  2302. updated_count += 1
  2303. successful += 1
  2304. results.append({
  2305. "item_id": item_id,
  2306. "status": "success",
  2307. "created": created_count,
  2308. "updated": updated_count
  2309. })
  2310. except Product.DoesNotExist:
  2311. failed += 1
  2312. results.append({
  2313. "item_id": item_id,
  2314. "status": "failed",
  2315. "error": f"Product not found"
  2316. })
  2317. return Response({
  2318. "results": results,
  2319. "total_products": len(products_data),
  2320. "successful": successful,
  2321. "failed": failed
  2322. }, status=status.HTTP_200_OK)
  2323. class ProductListWithAttributesView(APIView):
  2324. """
  2325. GET API to list all products with their original attribute values.
  2326. """
  2327. def get(self, request):
  2328. item_id = request.query_params.get('item_id')
  2329. if item_id:
  2330. try:
  2331. product = Product.objects.get(item_id=item_id)
  2332. serializer = ProductWithAttributesSerializer(product)
  2333. return Response(serializer.data, status=status.HTTP_200_OK)
  2334. except Product.DoesNotExist:
  2335. return Response({
  2336. "error": f"Product with item_id '{item_id}' not found"
  2337. }, status=status.HTTP_404_NOT_FOUND)
  2338. else:
  2339. products = Product.objects.all()
  2340. serializer = ProductWithAttributesSerializer(products, many=True)
  2341. return Response(serializer.data, status=status.HTTP_200_OK)