harshit.pathak
/
content_quality_tool


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
							# #  #==================== views.py ====================
# # from rest_framework.views import APIView
# # from rest_framework.response import Response
# # from rest_framework import status
# # from .serializers import (
# #     ProductAttributeRequestSerializer,
# #     ProductAttributeResponseSerializer
# # )
# # from .services import ProductAttributeService


# # class ExtractProductAttributesView(APIView):
# #     """
# #     API endpoint to extract product attributes using Groq LLM.
    
# #     POST /api/extract-attributes/
    
# #     Request Body:
# #     {
# #         "title": "Product title (optional)",
# #         "short_desc": "Short description (optional)",
# #         "long_desc": "Long description (optional)",
# #         "mandatory_attrs": {
# #             "Attribute1": ["value1", "value2", "value3"],
# #             "Attribute2": ["valueA", "valueB"]
# #         },
# #         "model": "llama-3.1-8b-instant (optional)",
# #         "extract_additional": true (optional, default: true)
# #     }
    
# #     Response:
# #     {
# #         "mandatory": {
# #             "Attribute1": "value1",
# #             "Attribute2": "valueA"
# #         },
# #         "additional": {
# #             "Color": "Blue",
# #             "Brand": "Example"
# #         }
# #     }
# #     """

# #     def post(self, request):
# #         # Validate request data
# #         serializer = ProductAttributeRequestSerializer(data=request.data)
# #         if not serializer.is_valid():
# #             return Response(
# #                 {"error": serializer.errors},
# #                 status=status.HTTP_400_BAD_REQUEST
# #             )

# #         validated_data = serializer.validated_data

# #         # Combine product text
# #         product_text = ProductAttributeService.combine_product_text(
# #             title=validated_data.get('title'),
# #             short_desc=validated_data.get('short_desc'),
# #             long_desc=validated_data.get('long_desc')
# #         )

# #         # Extract attributes
# #         result = ProductAttributeService.extract_attributes(
# #             product_text=product_text,
# #             mandatory_attrs=validated_data['mandatory_attrs'],
# #             model=validated_data.get('model'),
# #             extract_additional=validated_data.get('extract_additional', True)
# #         )

# #         # Return response
# #         response_serializer = ProductAttributeResponseSerializer(data=result)
# #         if response_serializer.is_valid():
# #             return Response(response_serializer.data, status=status.HTTP_200_OK)
        
# #         return Response(result, status=status.HTTP_200_OK)


# from rest_framework.views import APIView
# from rest_framework.response import Response
# from rest_framework import status
# from .serializers import (
#     SingleProductRequestSerializer,
#     BatchProductRequestSerializer,
#     ProductAttributeResultSerializer,
#     BatchProductResponseSerializer
# )
# from .services import ProductAttributeService


# class ExtractProductAttributesView(APIView):
#     """
#     API endpoint to extract product attributes for a single product.
    
#     POST /api/extract-attributes/
    
#     Request Body:
#     {
#         "title": "Product title (optional)",
#         "short_desc": "Short description (optional)",
#         "long_desc": "Long description (optional)",
#         "mandatory_attrs": {
#             "Attribute1": ["value1", "value2", "value3"],
#             "Attribute2": ["valueA", "valueB"]
#         },
#         "model": "llama-3.1-8b-instant (optional)",
#         "extract_additional": true (optional, default: true)
#     }
#     """

#     def post(self, request):
#         serializer = SingleProductRequestSerializer(data=request.data)
#         if not serializer.is_valid():
#             return Response(
#                 {"error": serializer.errors},
#                 status=status.HTTP_400_BAD_REQUEST
#             )

#         validated_data = serializer.validated_data

#         product_text = ProductAttributeService.combine_product_text(
#             title=validated_data.get('title'),
#             short_desc=validated_data.get('short_desc'),
#             long_desc=validated_data.get('long_desc')
#         )

#         result = ProductAttributeService.extract_attributes(
#             product_text=product_text,
#             mandatory_attrs=validated_data['mandatory_attrs'],
#             model=validated_data.get('model'),
#             extract_additional=validated_data.get('extract_additional', True)
#         )

#         response_serializer = ProductAttributeResultSerializer(data=result)
#         if response_serializer.is_valid():
#             return Response(response_serializer.data, status=status.HTTP_200_OK)
        
#         return Response(result, status=status.HTTP_200_OK)


# class BatchExtractProductAttributesView(APIView):
#     """
#     API endpoint to extract product attributes for multiple products in batch.
    
#     POST /api/batch-extract-attributes/
    
#     Request Body:
#     {
#         "products": [
#             {
#                 "product_id": "prod_001",
#                 "title": "Product 1 title",
#                 "short_desc": "Short description",
#                 "long_desc": "Long description"
#             },
#             {
#                 "product_id": "prod_002",
#                 "title": "Product 2 title",
#                 "short_desc": "Short description"
#             }
#         ],
#         "mandatory_attrs": {
#             "Attribute1": ["value1", "value2", "value3"],
#             "Attribute2": ["valueA", "valueB"]
#         },
#         "model": "llama-3.1-8b-instant (optional)",
#         "extract_additional": true (optional, default: true)
#     }
    
#     Response:
#     {
#         "results": [
#             {
#                 "product_id": "prod_001",
#                 "mandatory": {...},
#                 "additional": {...}
#             },
#             {
#                 "product_id": "prod_002",
#                 "mandatory": {...},
#                 "additional": {...}
#             }
#         ],
#         "total_products": 2,
#         "successful": 2,
#         "failed": 0
#     }
#     """

#     def post(self, request):
#         serializer = BatchProductRequestSerializer(data=request.data)
#         if not serializer.is_valid():
#             return Response(
#                 {"error": serializer.errors},
#                 status=status.HTTP_400_BAD_REQUEST
#             )

#         validated_data = serializer.validated_data

#         # Extract attributes for all products in batch
#         result = ProductAttributeService.extract_attributes_batch(
#             products=validated_data['products'],
#             mandatory_attrs=validated_data['mandatory_attrs'],
#             model=validated_data.get('model'),
#             extract_additional=validated_data.get('extract_additional', True)
#         )

#         response_serializer = BatchProductResponseSerializer(data=result)
#         if response_serializer.is_valid():
#             return Response(response_serializer.data, status=status.HTTP_200_OK)
        
#         return Response(result, status=status.HTTP_200_OK)


# ==================== views.py ====================
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
from .serializers import (
    SingleProductRequestSerializer,
    BatchProductRequestSerializer,
    ProductAttributeResultSerializer,
    BatchProductResponseSerializer
)
from .services import ProductAttributeService
from .ocr_service import OCRService


class ExtractProductAttributesView(APIView):
    """
    API endpoint to extract product attributes for a single product.
    Now supports image URL for OCR-based text extraction.
    """

    def post(self, request):
        serializer = SingleProductRequestSerializer(data=request.data)
        if not serializer.is_valid():
            return Response(
                {"error": serializer.errors},
                status=status.HTTP_400_BAD_REQUEST
            )

        validated_data = serializer.validated_data
        
        # Process image if URL provided
        ocr_results = None
        ocr_text = None
        
        if validated_data.get('process_image', True) and validated_data.get('image_url'):
            ocr_service = OCRService()
            ocr_results = ocr_service.process_image(validated_data['image_url'])
            
            # Extract attributes from OCR
            if ocr_results and ocr_results.get('detected_text'):
                ocr_attrs = ProductAttributeService.extract_attributes_from_ocr(
                    ocr_results,
                    validated_data.get('model')
                )
                ocr_results['extracted_attributes'] = ocr_attrs
                
                # Format OCR text
                ocr_text = "\n".join([
                    f"{item['text']} (confidence: {item['confidence']:.2f})"
                    for item in ocr_results['detected_text']
                ])

        # Combine all product information
        product_text = ProductAttributeService.combine_product_text(
            title=validated_data.get('title'),
            short_desc=validated_data.get('short_desc'),
            long_desc=validated_data.get('long_desc'),
            ocr_text=ocr_text
        )

        # Extract attributes
        result = ProductAttributeService.extract_attributes(
            product_text=product_text,
            mandatory_attrs=validated_data['mandatory_attrs'],
            model=validated_data.get('model'),
            extract_additional=validated_data.get('extract_additional', True)
        )
        
        # Add OCR results if available
        if ocr_results:
            result['ocr_results'] = ocr_results

        response_serializer = ProductAttributeResultSerializer(data=result)
        if response_serializer.is_valid():
            return Response(response_serializer.data, status=status.HTTP_200_OK)
        
        return Response(result, status=status.HTTP_200_OK)


class BatchExtractProductAttributesView(APIView):
    """
    API endpoint to extract product attributes for multiple products in batch.
    Now supports image URLs for OCR-based text extraction.
    """

    def post(self, request):
        serializer = BatchProductRequestSerializer(data=request.data)
        if not serializer.is_valid():
            return Response(
                {"error": serializer.errors},
                status=status.HTTP_400_BAD_REQUEST
            )

        validated_data = serializer.validated_data

        # Extract attributes for all products in batch
        result = ProductAttributeService.extract_attributes_batch(
            products=validated_data['products'],
            mandatory_attrs=validated_data['mandatory_attrs'],
            model=validated_data.get('model'),
            extract_additional=validated_data.get('extract_additional', True),
            process_image=validated_data.get('process_image', True)
        )

        response_serializer = BatchProductResponseSerializer(data=result)
        if response_serializer.is_valid():
            return Response(response_serializer.data, status=status.HTTP_200_OK)
        
        return Response(result, status=status.HTTP_200_OK)


from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
from .models import Product
from .serializers import ProductSerializer

class ProductListView(APIView):
    """
    GET API to list all products with details
    """
    def get(self, request):
        products = Product.objects.all()
        serializer = ProductSerializer(products, many=True)
        return Response(serializer.data, status=status.HTTP_200_OK)


import pandas as pd
from rest_framework.parsers import MultiPartParser, FormParser
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import status
from .models import Product
from .serializers import ProductSerializer


# class ProductUploadExcelView(APIView):
#     """
#     POST API to upload an Excel file and add data to Product model
#     """
#     parser_classes = (MultiPartParser, FormParser)

#     def post(self, request, *args, **kwargs):
#         file_obj = request.FILES.get('file')
#         if not file_obj:
#             return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)

#         try:
#             # Read the Excel file
#             df = pd.read_excel(file_obj)

#             # Normalize column names
#             df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]

#             # Expected columns
#             expected_cols = {
#                 'item_id',
#                 'product_name',
#                 'product_long_description',
#                 'product_short_description',
#                 'product_type',
#                 'image_path'
#             }

#             if not expected_cols.issubset(df.columns):
#                 return Response({
#                     'error': 'Missing required columns',
#                     'required_columns': list(expected_cols)
#                 }, status=status.HTTP_400_BAD_REQUEST)

#             # Loop through rows and create Product entries
#             created_count = 0
#             for _, row in df.iterrows():
#                 Product.objects.create(
#                     item_id=row.get('item_id', ''),
#                     product_name=row.get('product_name', ''),
#                     product_long_description=row.get('product_long_description', ''),
#                     product_short_description=row.get('product_short_description', ''),
#                     product_type=row.get('product_type', ''),
#                     image_path=row.get('image_path', ''),
#                 )
#                 created_count += 1

#             return Response({
#                 'message': f'Successfully uploaded {created_count} products.'
#             }, status=status.HTTP_201_CREATED)

#         except Exception as e:
#             return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)


class ProductUploadExcelView(APIView):
    """
    POST API to upload an Excel file and add data to Product model (skip duplicates)
    """
    parser_classes = (MultiPartParser, FormParser)

    def post(self, request, *args, **kwargs):
        file_obj = request.FILES.get('file')
        if not file_obj:
            return Response({'error': 'No file provided'}, status=status.HTTP_400_BAD_REQUEST)

        try:
            import pandas as pd
            df = pd.read_excel(file_obj)
            df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]

            expected_cols = {
                'item_id',
                'product_name',
                'product_long_description',
                'product_short_description',
                'product_type',
                'image_path'
            }

            if not expected_cols.issubset(df.columns):
                return Response({
                    'error': 'Missing required columns',
                    'required_columns': list(expected_cols)
                }, status=status.HTTP_400_BAD_REQUEST)

            created_count = 0
            skipped_count = 0

            for _, row in df.iterrows():
                item_id = row.get('item_id', '')

                # Check if this item already exists
                if Product.objects.filter(item_id=item_id).exists():
                    skipped_count += 1
                    continue

                Product.objects.create(
                    item_id=item_id,
                    product_name=row.get('product_name', ''),
                    product_long_description=row.get('product_long_description', ''),
                    product_short_description=row.get('product_short_description', ''),
                    product_type=row.get('product_type', ''),
                    image_path=row.get('image_path', ''),
                )
                created_count += 1

            return Response({
                'message': f'Successfully uploaded {created_count} products.',
                'skipped': f'Skipped {skipped_count} duplicates.'
            }, status=status.HTTP_201_CREATED)

        except Exception as e:
            return Response({'error': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)