views.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. # # #==================== views.py ====================
  2. # # from rest_framework.views import APIView
  3. # # from rest_framework.response import Response
  4. # # from rest_framework import status
  5. # # from .serializers import (
  6. # # ProductAttributeRequestSerializer,
  7. # # ProductAttributeResponseSerializer
  8. # # )
  9. # # from .services import ProductAttributeService
  10. # # class ExtractProductAttributesView(APIView):
  11. # # """
  12. # # API endpoint to extract product attributes using Groq LLM.
  13. # # POST /api/extract-attributes/
  14. # # Request Body:
  15. # # {
  16. # # "title": "Product title (optional)",
  17. # # "short_desc": "Short description (optional)",
  18. # # "long_desc": "Long description (optional)",
  19. # # "mandatory_attrs": {
  20. # # "Attribute1": ["value1", "value2", "value3"],
  21. # # "Attribute2": ["valueA", "valueB"]
  22. # # },
  23. # # "model": "llama-3.1-8b-instant (optional)",
  24. # # "extract_additional": true (optional, default: true)
  25. # # }
  26. # # Response:
  27. # # {
  28. # # "mandatory": {
  29. # # "Attribute1": "value1",
  30. # # "Attribute2": "valueA"
  31. # # },
  32. # # "additional": {
  33. # # "Color": "Blue",
  34. # # "Brand": "Example"
  35. # # }
  36. # # }
  37. # # """
  38. # # def post(self, request):
  39. # # # Validate request data
  40. # # serializer = ProductAttributeRequestSerializer(data=request.data)
  41. # # if not serializer.is_valid():
  42. # # return Response(
  43. # # {"error": serializer.errors},
  44. # # status=status.HTTP_400_BAD_REQUEST
  45. # # )
  46. # # validated_data = serializer.validated_data
  47. # # # Combine product text
  48. # # product_text = ProductAttributeService.combine_product_text(
  49. # # title=validated_data.get('title'),
  50. # # short_desc=validated_data.get('short_desc'),
  51. # # long_desc=validated_data.get('long_desc')
  52. # # )
  53. # # # Extract attributes
  54. # # result = ProductAttributeService.extract_attributes(
  55. # # product_text=product_text,
  56. # # mandatory_attrs=validated_data['mandatory_attrs'],
  57. # # model=validated_data.get('model'),
  58. # # extract_additional=validated_data.get('extract_additional', True)
  59. # # )
  60. # # # Return response
  61. # # response_serializer = ProductAttributeResponseSerializer(data=result)
  62. # # if response_serializer.is_valid():
  63. # # return Response(response_serializer.data, status=status.HTTP_200_OK)
  64. # # return Response(result, status=status.HTTP_200_OK)
  65. # from rest_framework.views import APIView
  66. # from rest_framework.response import Response
  67. # from rest_framework import status
  68. # from .serializers import (
  69. # SingleProductRequestSerializer,
  70. # BatchProductRequestSerializer,
  71. # ProductAttributeResultSerializer,
  72. # BatchProductResponseSerializer
  73. # )
  74. # from .services import ProductAttributeService
  75. # class ExtractProductAttributesView(APIView):
  76. # """
  77. # API endpoint to extract product attributes for a single product.
  78. # POST /api/extract-attributes/
  79. # Request Body:
  80. # {
  81. # "title": "Product title (optional)",
  82. # "short_desc": "Short description (optional)",
  83. # "long_desc": "Long description (optional)",
  84. # "mandatory_attrs": {
  85. # "Attribute1": ["value1", "value2", "value3"],
  86. # "Attribute2": ["valueA", "valueB"]
  87. # },
  88. # "model": "llama-3.1-8b-instant (optional)",
  89. # "extract_additional": true (optional, default: true)
  90. # }
  91. # """
  92. # def post(self, request):
  93. # serializer = SingleProductRequestSerializer(data=request.data)
  94. # if not serializer.is_valid():
  95. # return Response(
  96. # {"error": serializer.errors},
  97. # status=status.HTTP_400_BAD_REQUEST
  98. # )
  99. # validated_data = serializer.validated_data
  100. # product_text = ProductAttributeService.combine_product_text(
  101. # title=validated_data.get('title'),
  102. # short_desc=validated_data.get('short_desc'),
  103. # long_desc=validated_data.get('long_desc')
  104. # )
  105. # result = ProductAttributeService.extract_attributes(
  106. # product_text=product_text,
  107. # mandatory_attrs=validated_data['mandatory_attrs'],
  108. # model=validated_data.get('model'),
  109. # extract_additional=validated_data.get('extract_additional', True)
  110. # )
  111. # response_serializer = ProductAttributeResultSerializer(data=result)
  112. # if response_serializer.is_valid():
  113. # return Response(response_serializer.data, status=status.HTTP_200_OK)
  114. # return Response(result, status=status.HTTP_200_OK)
  115. # class BatchExtractProductAttributesView(APIView):
  116. # """
  117. # API endpoint to extract product attributes for multiple products in batch.
  118. # POST /api/batch-extract-attributes/
  119. # Request Body:
  120. # {
  121. # "products": [
  122. # {
  123. # "product_id": "prod_001",
  124. # "title": "Product 1 title",
  125. # "short_desc": "Short description",
  126. # "long_desc": "Long description"
  127. # },
  128. # {
  129. # "product_id": "prod_002",
  130. # "title": "Product 2 title",
  131. # "short_desc": "Short description"
  132. # }
  133. # ],
  134. # "mandatory_attrs": {
  135. # "Attribute1": ["value1", "value2", "value3"],
  136. # "Attribute2": ["valueA", "valueB"]
  137. # },
  138. # "model": "llama-3.1-8b-instant (optional)",
  139. # "extract_additional": true (optional, default: true)
  140. # }
  141. # Response:
  142. # {
  143. # "results": [
  144. # {
  145. # "product_id": "prod_001",
  146. # "mandatory": {...},
  147. # "additional": {...}
  148. # },
  149. # {
  150. # "product_id": "prod_002",
  151. # "mandatory": {...},
  152. # "additional": {...}
  153. # }
  154. # ],
  155. # "total_products": 2,
  156. # "successful": 2,
  157. # "failed": 0
  158. # }
  159. # """
  160. # def post(self, request):
  161. # serializer = BatchProductRequestSerializer(data=request.data)
  162. # if not serializer.is_valid():
  163. # return Response(
  164. # {"error": serializer.errors},
  165. # status=status.HTTP_400_BAD_REQUEST
  166. # )
  167. # validated_data = serializer.validated_data
  168. # # Extract attributes for all products in batch
  169. # result = ProductAttributeService.extract_attributes_batch(
  170. # products=validated_data['products'],
  171. # mandatory_attrs=validated_data['mandatory_attrs'],
  172. # model=validated_data.get('model'),
  173. # extract_additional=validated_data.get('extract_additional', True)
  174. # )
  175. # response_serializer = BatchProductResponseSerializer(data=result)
  176. # if response_serializer.is_valid():
  177. # return Response(response_serializer.data, status=status.HTTP_200_OK)
  178. # return Response(result, status=status.HTTP_200_OK)
  179. # ==================== views.py ====================
  180. from rest_framework.views import APIView
  181. from rest_framework.response import Response
  182. from rest_framework import status
  183. from .serializers import (
  184. SingleProductRequestSerializer,
  185. BatchProductRequestSerializer,
  186. ProductAttributeResultSerializer,
  187. BatchProductResponseSerializer
  188. )
  189. from .services import ProductAttributeService
  190. from .ocr_service import OCRService
  191. class ExtractProductAttributesView(APIView):
  192. """
  193. API endpoint to extract product attributes for a single product.
  194. Now supports image URL for OCR-based text extraction.
  195. """
  196. def post(self, request):
  197. serializer = SingleProductRequestSerializer(data=request.data)
  198. if not serializer.is_valid():
  199. return Response(
  200. {"error": serializer.errors},
  201. status=status.HTTP_400_BAD_REQUEST
  202. )
  203. validated_data = serializer.validated_data
  204. # Process image if URL provided
  205. ocr_results = None
  206. ocr_text = None
  207. if validated_data.get('process_image', True) and validated_data.get('image_url'):
  208. ocr_service = OCRService()
  209. ocr_results = ocr_service.process_image(validated_data['image_url'])
  210. # Extract attributes from OCR
  211. if ocr_results and ocr_results.get('detected_text'):
  212. ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
  213. ocr_results,
  214. validated_data.get('model')
  215. )
  216. ocr_results['extracted_attributes'] = ocr_attrs
  217. # Format OCR text
  218. ocr_text = "\n".join([
  219. f"{item['text']} (confidence: {item['confidence']:.2f})"
  220. for item in ocr_results['detected_text']
  221. ])
  222. # Combine all product information
  223. product_text = ProductAttributeService.combine_product_text(
  224. title=validated_data.get('title'),
  225. short_desc=validated_data.get('short_desc'),
  226. long_desc=validated_data.get('long_desc'),
  227. ocr_text=ocr_text
  228. )
  229. # Extract attributes
  230. result = ProductAttributeService.extract_attributes(
  231. product_text=product_text,
  232. mandatory_attrs=validated_data['mandatory_attrs'],
  233. model=validated_data.get('model'),
  234. extract_additional=validated_data.get('extract_additional', True)
  235. )
  236. # Add OCR results if available
  237. if ocr_results:
  238. result['ocr_results'] = ocr_results
  239. response_serializer = ProductAttributeResultSerializer(data=result)
  240. if response_serializer.is_valid():
  241. return Response(response_serializer.data, status=status.HTTP_200_OK)
  242. return Response(result, status=status.HTTP_200_OK)
  243. class BatchExtractProductAttributesView(APIView):
  244. """
  245. API endpoint to extract product attributes for multiple products in batch.
  246. Now supports image URLs for OCR-based text extraction.
  247. """
  248. def post(self, request):
  249. serializer = BatchProductRequestSerializer(data=request.data)
  250. if not serializer.is_valid():
  251. return Response(
  252. {"error": serializer.errors},
  253. status=status.HTTP_400_BAD_REQUEST
  254. )
  255. validated_data = serializer.validated_data
  256. # Extract attributes for all products in batch
  257. result = ProductAttributeService.extract_attributes_batch(
  258. products=validated_data['products'],
  259. mandatory_attrs=validated_data['mandatory_attrs'],
  260. model=validated_data.get('model'),
  261. extract_additional=validated_data.get('extract_additional', True),
  262. process_image=validated_data.get('process_image', True)
  263. )
  264. response_serializer = BatchProductResponseSerializer(data=result)
  265. if response_serializer.is_valid():
  266. return Response(response_serializer.data, status=status.HTTP_200_OK)
  267. return Response(result, status=status.HTTP_200_OK)
  268. from rest_framework.views import APIView
  269. from rest_framework.response import Response
  270. from rest_framework import status
  271. from .models import Product
  272. from .serializers import ProductSerializer
  273. class ProductListView(APIView):
  274. """
  275. GET API to list all products with details
  276. """
  277. def get(self, request):
  278. products = Product.objects.all()
  279. serializer = ProductSerializer(products, many=True)
  280. return Response(serializer.data, status=status.HTTP_200_OK)
  281. import pandas as pd
  282. from rest_framework.parsers import MultiPartParser, FormParser
  283. from rest_framework.views import APIView
  284. from rest_framework.response import Response
  285. from rest_framework import status
  286. from .models import Product
  287. from .serializers import ProductSerializer
  288. # class ProductUploadExcelView(APIView):
  289. # """
  290. # POST API to upload an Excel file and add data to Product model
  291. # """
  292. # parser_classes = (MultiPartParser, FormParser)
  293. # def post(self, request, *args, **kwargs):
  294. # file_obj = request.FILES.get('file')
  295. # if not file_obj:
  296. # return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  297. # try:
  298. # # Read the Excel file
  299. # df = pd.read_excel(file_obj)
  300. # # Normalize column names
  301. # df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
  302. # # Expected columns
  303. # expected_cols = {
  304. # 'item_id',
  305. # 'product_name',
  306. # 'product_long_description',
  307. # 'product_short_description',
  308. # 'product_type',
  309. # 'image_path'
  310. # }
  311. # if not expected_cols.issubset(df.columns):
  312. # return Response({
  313. # 'error': 'Missing required columns',
  314. # 'required_columns': list(expected_cols)
  315. # }, status=status.HTTP_400_BAD_REQUEST)
  316. # # Loop through rows and create Product entries
  317. # created_count = 0
  318. # for _, row in df.iterrows():
  319. # Product.objects.create(
  320. # item_id=row.get('item_id', ''),
  321. # product_name=row.get('product_name', ''),
  322. # product_long_description=row.get('product_long_description', ''),
  323. # product_short_description=row.get('product_short_description', ''),
  324. # product_type=row.get('product_type', ''),
  325. # image_path=row.get('image_path', ''),
  326. # )
  327. # created_count += 1
  328. # return Response({
  329. # 'message': f'Successfully uploaded {created_count} products.'
  330. # }, status=status.HTTP_201_CREATED)
  331. # except Exception as e:
  332. # return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
  333. class ProductUploadExcelView(APIView):
  334. """
  335. POST API to upload an Excel file and add data to Product model (skip duplicates)
  336. """
  337. parser_classes = (MultiPartParser, FormParser)
  338. def post(self, request, *args, **kwargs):
  339. file_obj = request.FILES.get('file')
  340. if not file_obj:
  341. return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
  342. try:
  343. import pandas as pd
  344. df = pd.read_excel(file_obj)
  345. df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
  346. expected_cols = {
  347. 'item_id',
  348. 'product_name',
  349. 'product_long_description',
  350. 'product_short_description',
  351. 'product_type',
  352. 'image_path'
  353. }
  354. if not expected_cols.issubset(df.columns):
  355. return Response({
  356. 'error': 'Missing required columns',
  357. 'required_columns': list(expected_cols)
  358. }, status=status.HTTP_400_BAD_REQUEST)
  359. created_count = 0
  360. skipped_count = 0
  361. for _, row in df.iterrows():
  362. item_id = row.get('item_id', '')
  363. # Check if this item already exists
  364. if Product.objects.filter(item_id=item_id).exists():
  365. skipped_count += 1
  366. continue
  367. Product.objects.create(
  368. item_id=item_id,
  369. product_name=row.get('product_name', ''),
  370. product_long_description=row.get('product_long_description', ''),
  371. product_short_description=row.get('product_short_description', ''),
  372. product_type=row.get('product_type', ''),
  373. image_path=row.get('image_path', ''),
  374. )
  375. created_count += 1
  376. return Response({
  377. 'message': f'Successfully uploaded {created_count} products.',
  378. 'skipped': f'Skipped {skipped_count} duplicates.'
  379. }, status=status.HTTP_201_CREATED)
  380. except Exception as e:
  381. return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)