| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664 |
- # # #==================== views.py ====================
- # # from rest_framework.views import APIView
- # # from rest_framework.response import Response
- # # from rest_framework import status
- # # from .serializers import (
- # # ProductAttributeRequestSerializer,
- # # ProductAttributeResponseSerializer
- # # )
- # # from .services import ProductAttributeService
- # # class ExtractProductAttributesView(APIView):
- # # """
- # # API endpoint to extract product attributes using Groq LLM.
-
- # # POST /api/extract-attributes/
-
- # # Request Body:
- # # {
- # # "title": "Product title (optional)",
- # # "short_desc": "Short description (optional)",
- # # "long_desc": "Long description (optional)",
- # # "mandatory_attrs": {
- # # "Attribute1": ["value1", "value2", "value3"],
- # # "Attribute2": ["valueA", "valueB"]
- # # },
- # # "model": "llama-3.1-8b-instant (optional)",
- # # "extract_additional": true (optional, default: true)
- # # }
-
- # # Response:
- # # {
- # # "mandatory": {
- # # "Attribute1": "value1",
- # # "Attribute2": "valueA"
- # # },
- # # "additional": {
- # # "Color": "Blue",
- # # "Brand": "Example"
- # # }
- # # }
- # # """
- # # def post(self, request):
- # # # Validate request data
- # # serializer = ProductAttributeRequestSerializer(data=request.data)
- # # if not serializer.is_valid():
- # # return Response(
- # # {"error": serializer.errors},
- # # status=status.HTTP_400_BAD_REQUEST
- # # )
- # # validated_data = serializer.validated_data
- # # # Combine product text
- # # product_text = ProductAttributeService.combine_product_text(
- # # title=validated_data.get('title'),
- # # short_desc=validated_data.get('short_desc'),
- # # long_desc=validated_data.get('long_desc')
- # # )
- # # # Extract attributes
- # # result = ProductAttributeService.extract_attributes(
- # # product_text=product_text,
- # # mandatory_attrs=validated_data['mandatory_attrs'],
- # # model=validated_data.get('model'),
- # # extract_additional=validated_data.get('extract_additional', True)
- # # )
- # # # Return response
- # # response_serializer = ProductAttributeResponseSerializer(data=result)
- # # if response_serializer.is_valid():
- # # return Response(response_serializer.data, status=status.HTTP_200_OK)
-
- # # return Response(result, status=status.HTTP_200_OK)
- # from rest_framework.views import APIView
- # from rest_framework.response import Response
- # from rest_framework import status
- # from .serializers import (
- # SingleProductRequestSerializer,
- # BatchProductRequestSerializer,
- # ProductAttributeResultSerializer,
- # BatchProductResponseSerializer
- # )
- # from .services import ProductAttributeService
- # class ExtractProductAttributesView(APIView):
- # """
- # API endpoint to extract product attributes for a single product.
-
- # POST /api/extract-attributes/
-
- # Request Body:
- # {
- # "title": "Product title (optional)",
- # "short_desc": "Short description (optional)",
- # "long_desc": "Long description (optional)",
- # "mandatory_attrs": {
- # "Attribute1": ["value1", "value2", "value3"],
- # "Attribute2": ["valueA", "valueB"]
- # },
- # "model": "llama-3.1-8b-instant (optional)",
- # "extract_additional": true (optional, default: true)
- # }
- # """
- # def post(self, request):
- # serializer = SingleProductRequestSerializer(data=request.data)
- # if not serializer.is_valid():
- # return Response(
- # {"error": serializer.errors},
- # status=status.HTTP_400_BAD_REQUEST
- # )
- # validated_data = serializer.validated_data
- # product_text = ProductAttributeService.combine_product_text(
- # title=validated_data.get('title'),
- # short_desc=validated_data.get('short_desc'),
- # long_desc=validated_data.get('long_desc')
- # )
- # result = ProductAttributeService.extract_attributes(
- # product_text=product_text,
- # mandatory_attrs=validated_data['mandatory_attrs'],
- # model=validated_data.get('model'),
- # extract_additional=validated_data.get('extract_additional', True)
- # )
- # response_serializer = ProductAttributeResultSerializer(data=result)
- # if response_serializer.is_valid():
- # return Response(response_serializer.data, status=status.HTTP_200_OK)
-
- # return Response(result, status=status.HTTP_200_OK)
- # class BatchExtractProductAttributesView(APIView):
- # """
- # API endpoint to extract product attributes for multiple products in batch.
-
- # POST /api/batch-extract-attributes/
-
- # Request Body:
- # {
- # "products": [
- # {
- # "product_id": "prod_001",
- # "title": "Product 1 title",
- # "short_desc": "Short description",
- # "long_desc": "Long description"
- # },
- # {
- # "product_id": "prod_002",
- # "title": "Product 2 title",
- # "short_desc": "Short description"
- # }
- # ],
- # "mandatory_attrs": {
- # "Attribute1": ["value1", "value2", "value3"],
- # "Attribute2": ["valueA", "valueB"]
- # },
- # "model": "llama-3.1-8b-instant (optional)",
- # "extract_additional": true (optional, default: true)
- # }
-
- # Response:
- # {
- # "results": [
- # {
- # "product_id": "prod_001",
- # "mandatory": {...},
- # "additional": {...}
- # },
- # {
- # "product_id": "prod_002",
- # "mandatory": {...},
- # "additional": {...}
- # }
- # ],
- # "total_products": 2,
- # "successful": 2,
- # "failed": 0
- # }
- # """
- # def post(self, request):
- # serializer = BatchProductRequestSerializer(data=request.data)
- # if not serializer.is_valid():
- # return Response(
- # {"error": serializer.errors},
- # status=status.HTTP_400_BAD_REQUEST
- # )
- # validated_data = serializer.validated_data
- # # Extract attributes for all products in batch
- # result = ProductAttributeService.extract_attributes_batch(
- # products=validated_data['products'],
- # mandatory_attrs=validated_data['mandatory_attrs'],
- # model=validated_data.get('model'),
- # extract_additional=validated_data.get('extract_additional', True)
- # )
- # response_serializer = BatchProductResponseSerializer(data=result)
- # if response_serializer.is_valid():
- # return Response(response_serializer.data, status=status.HTTP_200_OK)
-
- # return Response(result, status=status.HTTP_200_OK)
- # ==================== views.py ====================
- from rest_framework.views import APIView
- from rest_framework.response import Response
- from rest_framework import status
- from .serializers import (
- SingleProductRequestSerializer,
- BatchProductRequestSerializer,
- ProductAttributeResultSerializer,
- BatchProductResponseSerializer
- )
- from .services import ProductAttributeService
- from .ocr_service import OCRService
- # class ExtractProductAttributesView(APIView):
- # """
- # API endpoint to extract product attributes for a single product.
- # Now supports image URL for OCR-based text extraction.
- # """
- # def post(self, request):
- # serializer = SingleProductRequestSerializer(data=request.data)
- # if not serializer.is_valid():
- # return Response(
- # {"error": serializer.errors},
- # status=status.HTTP_400_BAD_REQUEST
- # )
- # validated_data = serializer.validated_data
-
- # # Process image if URL provided
- # ocr_results = None
- # ocr_text = None
-
- # if validated_data.get('process_image', True) and validated_data.get('image_url'):
- # ocr_service = OCRService()
- # ocr_results = ocr_service.process_image(validated_data['image_url'])
-
- # # Extract attributes from OCR
- # if ocr_results and ocr_results.get('detected_text'):
- # ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
- # ocr_results,
- # validated_data.get('model')
- # )
- # ocr_results['extracted_attributes'] = ocr_attrs
-
- # # Format OCR text
- # ocr_text = "\n".join([
- # f"{item['text']} (confidence: {item['confidence']:.2f})"
- # for item in ocr_results['detected_text']
- # ])
- # # Combine all product information
- # product_text = ProductAttributeService.combine_product_text(
- # title=validated_data.get('title'),
- # short_desc=validated_data.get('short_desc'),
- # long_desc=validated_data.get('long_desc'),
- # ocr_text=ocr_text
- # )
- # # Extract attributes
- # result = ProductAttributeService.extract_attributes(
- # product_text=product_text,
- # mandatory_attrs=validated_data['mandatory_attrs'],
- # model=validated_data.get('model'),
- # extract_additional=validated_data.get('extract_additional', True)
- # )
-
- # # Add OCR results if available
- # if ocr_results:
- # result['ocr_results'] = ocr_results
- # response_serializer = ProductAttributeResultSerializer(data=result)
- # if response_serializer.is_valid():
- # return Response(response_serializer.data, status=status.HTTP_200_OK)
-
- # return Response(result, status=status.HTTP_200_OK)
- from .models import Product
- class ExtractProductAttributesView(APIView):
- """
- API endpoint to extract product attributes for a single product by item_id.
- Fetches product details from database.
- """
- def post(self, request):
- serializer = SingleProductRequestSerializer(data=request.data)
- if not serializer.is_valid():
- return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
- validated_data = serializer.validated_data
- item_id = validated_data.get("item_id")
- # Fetch product from DB
- try:
- product = Product.objects.get(item_id=item_id)
- except Product.DoesNotExist:
- return Response(
- {"error": f"Product with item_id '{item_id}' not found."},
- status=status.HTTP_404_NOT_FOUND
- )
- # Extract product details
- title = product.product_name
- short_desc = product.product_short_description
- long_desc = product.product_long_description
- image_url = product.image_path
- # Process image for OCR if required
- ocr_results = None
- ocr_text = None
- if validated_data.get("process_image", True) and image_url:
- ocr_service = OCRService()
- ocr_results = ocr_service.process_image(image_url)
- if ocr_results and ocr_results.get("detected_text"):
- ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
- ocr_results, validated_data.get("model")
- )
- ocr_results["extracted_attributes"] = ocr_attrs
- ocr_text = "\n".join([
- f"{item['text']} (confidence: {item['confidence']:.2f})"
- for item in ocr_results["detected_text"]
- ])
- # Combine all product text
- product_text = ProductAttributeService.combine_product_text(
- title=title,
- short_desc=short_desc,
- long_desc=long_desc,
- ocr_text=ocr_text
- )
- # Extract attributes
- result = ProductAttributeService.extract_attributes(
- product_text=product_text,
- mandatory_attrs=validated_data["mandatory_attrs"],
- model=validated_data.get("model"),
- extract_additional=validated_data.get("extract_additional", True)
- )
- # Attach OCR results if available
- if ocr_results:
- result["ocr_results"] = ocr_results
- response_serializer = ProductAttributeResultSerializer(data=result)
- if response_serializer.is_valid():
- return Response(response_serializer.data, status=status.HTTP_200_OK)
- return Response(result, status=status.HTTP_200_OK)
- # class BatchExtractProductAttributesView(APIView):
- # """
- # API endpoint to extract product attributes for multiple products in batch.
- # Now supports image URLs for OCR-based text extraction.
- # """
- # def post(self, request):
- # serializer = BatchProductRequestSerializer(data=request.data)
- # if not serializer.is_valid():
- # return Response(
- # {"error": serializer.errors},
- # status=status.HTTP_400_BAD_REQUEST
- # )
- # validated_data = serializer.validated_data
- # # Extract attributes for all products in batch
- # result = ProductAttributeService.extract_attributes_batch(
- # products=validated_data['products'],
- # mandatory_attrs=validated_data['mandatory_attrs'],
- # model=validated_data.get('model'),
- # extract_additional=validated_data.get('extract_additional', True),
- # process_image=validated_data.get('process_image', True)
- # )
- # response_serializer = BatchProductResponseSerializer(data=result)
- # if response_serializer.is_valid():
- # return Response(response_serializer.data, status=status.HTTP_200_OK)
-
- # return Response(result, status=status.HTTP_200_OK)
- from .models import Product
- class BatchExtractProductAttributesView(APIView):
- """
- API endpoint to extract product attributes for multiple products in batch by item_id.
- Fetches all product details from database automatically.
- """
- def post(self, request):
- serializer = BatchProductRequestSerializer(data=request.data)
- if not serializer.is_valid():
- return Response({"error": serializer.errors}, status=status.HTTP_400_BAD_REQUEST)
- validated_data = serializer.validated_data
- item_ids = validated_data.get("item_ids", [])
- model = validated_data.get("model")
- extract_additional = validated_data.get("extract_additional", True)
- process_image = validated_data.get("process_image", True)
- mandatory_attrs = validated_data["mandatory_attrs"]
- # Fetch all products in one query
- products = Product.objects.filter(item_id__in=item_ids)
- found_ids = set(products.values_list("item_id", flat=True))
- missing_ids = [pid for pid in item_ids if pid not in found_ids]
- results = []
- successful = 0
- failed = 0
- for product in products:
- try:
- title = product.product_name
- short_desc = product.product_short_description
- long_desc = product.product_long_description
- image_url = product.image_path
- ocr_results = None
- ocr_text = None
- if process_image and image_url:
- ocr_service = OCRService()
- ocr_results = ocr_service.process_image(image_url)
- if ocr_results and ocr_results.get("detected_text"):
- ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
- ocr_results, model
- )
- ocr_results["extracted_attributes"] = ocr_attrs
- ocr_text = "\n".join([
- f"{item['text']} (confidence: {item['confidence']:.2f})"
- for item in ocr_results["detected_text"]
- ])
- product_text = ProductAttributeService.combine_product_text(
- title=title,
- short_desc=short_desc,
- long_desc=long_desc,
- ocr_text=ocr_text
- )
- extracted = ProductAttributeService.extract_attributes(
- product_text=product_text,
- mandatory_attrs=mandatory_attrs,
- model=model,
- extract_additional=extract_additional
- )
- result = {
- "product_id": product.item_id,
- "mandatory": extracted.get("mandatory", {}),
- "additional": extracted.get("additional", {}),
- }
- if ocr_results:
- result["ocr_results"] = ocr_results
- results.append(result)
- successful += 1
- except Exception as e:
- failed += 1
- results.append({
- "product_id": product.item_id,
- "error": str(e)
- })
- # Add missing item_ids as failed entries
- for mid in missing_ids:
- failed += 1
- results.append({
- "product_id": mid,
- "error": "Product not found in database"
- })
- batch_result = {
- "results": results,
- "total_products": len(item_ids),
- "successful": successful,
- "failed": failed
- }
- response_serializer = BatchProductResponseSerializer(data=batch_result)
- if response_serializer.is_valid():
- return Response(response_serializer.data, status=status.HTTP_200_OK)
- return Response(batch_result, status=status.HTTP_200_OK)
- from rest_framework.views import APIView
- from rest_framework.response import Response
- from rest_framework import status
- from .models import Product
- from .serializers import ProductSerializer
- class ProductListView(APIView):
- """
- GET API to list all products with details
- """
- def get(self, request):
- products = Product.objects.all()
- serializer = ProductSerializer(products, many=True)
- return Response(serializer.data, status=status.HTTP_200_OK)
- import pandas as pd
- from rest_framework.parsers import MultiPartParser, FormParser
- from rest_framework.views import APIView
- from rest_framework.response import Response
- from rest_framework import status
- from .models import Product
- from .serializers import ProductSerializer
- # class ProductUploadExcelView(APIView):
- # """
- # POST API to upload an Excel file and add data to Product model
- # """
- # parser_classes = (MultiPartParser, FormParser)
- # def post(self, request, *args, **kwargs):
- # file_obj = request.FILES.get('file')
- # if not file_obj:
- # return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
- # try:
- # # Read the Excel file
- # df = pd.read_excel(file_obj)
- # # Normalize column names
- # df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
- # # Expected columns
- # expected_cols = {
- # 'item_id',
- # 'product_name',
- # 'product_long_description',
- # 'product_short_description',
- # 'product_type',
- # 'image_path'
- # }
- # if not expected_cols.issubset(df.columns):
- # return Response({
- # 'error': 'Missing required columns',
- # 'required_columns': list(expected_cols)
- # }, status=status.HTTP_400_BAD_REQUEST)
- # # Loop through rows and create Product entries
- # created_count = 0
- # for _, row in df.iterrows():
- # Product.objects.create(
- # item_id=row.get('item_id', ''),
- # product_name=row.get('product_name', ''),
- # product_long_description=row.get('product_long_description', ''),
- # product_short_description=row.get('product_short_description', ''),
- # product_type=row.get('product_type', ''),
- # image_path=row.get('image_path', ''),
- # )
- # created_count += 1
- # return Response({
- # 'message': f'Successfully uploaded {created_count} products.'
- # }, status=status.HTTP_201_CREATED)
- # except Exception as e:
- # return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
- class ProductUploadExcelView(APIView):
- """
- POST API to upload an Excel file and add data to Product model (skip duplicates)
- """
- parser_classes = (MultiPartParser, FormParser)
- def post(self, request, *args, **kwargs):
- file_obj = request.FILES.get('file')
- if not file_obj:
- return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)
- try:
- import pandas as pd
- df = pd.read_excel(file_obj)
- df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
- expected_cols = {
- 'item_id',
- 'product_name',
- 'product_long_description',
- 'product_short_description',
- 'product_type',
- 'image_path'
- }
- if not expected_cols.issubset(df.columns):
- return Response({
- 'error': 'Missing required columns',
- 'required_columns': list(expected_cols)
- }, status=status.HTTP_400_BAD_REQUEST)
- created_count = 0
- skipped_count = 0
- for _, row in df.iterrows():
- item_id = row.get('item_id', '')
- # Check if this item already exists
- if Product.objects.filter(item_id=item_id).exists():
- skipped_count += 1
- continue
- Product.objects.create(
- item_id=item_id,
- product_name=row.get('product_name', ''),
- product_long_description=row.get('product_long_description', ''),
- product_short_description=row.get('product_short_description', ''),
- product_type=row.get('product_type', ''),
- image_path=row.get('image_path', ''),
- )
- created_count += 1
- return Response({
- 'message': f'Successfully uploaded {created_count} products.',
- 'skipped': f'Skipped {skipped_count} duplicates.'
- }, status=status.HTTP_201_CREATED)
- except Exception as e:
- return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|