require('dotenv').config();
const puppeteer = require('puppeteer');
const tldjs = require('tldjs');
const KeywordExtractor = require('keyword-extractor');
const Typesense = require('typesense');

class ImageCrawler {
    constructor() {
        this.config = {
            botName: process.env.BOT_NAME || 'ImageCrawlerBot/1.0',
            maxImages: parseInt(process.env.MAX_IMAGES) || 20,
            maxInnerLinksImages: parseInt(process.env.MAX_INNER_LINKS_IMAGES) || 5,
            maxRetries: parseInt(process.env.MAX_RETRIES) || 3,
            respectRobots: process.env.RESPECT_ROBOTS === 'true',
            requestDelay: parseInt(process.env.REQUEST_DELAY) || 2000,
            batchSize: parseInt(process.env.BATCH_SIZE) || 5,
            headless: process.env.HEADLESS === 'true',
            viewportWidth: parseInt(process.env.VIEWPORT_WIDTH) || 1920,
            viewportHeight: parseInt(process.env.VIEWPORT_HEIGHT) || 1080,
            userAgent: process.env.USER_AGENT,
            pageTimeout: parseInt(process.env.PAGE_TIMEOUT) || 45000,
            navigationTimeout: parseInt(process.env.NAVIGATION_TIMEOUT) || 45000,
            
            // Relaxed image quality filters
            minWidth: parseInt(process.env.MIN_IMAGE_WIDTH) || 100,
            minHeight: parseInt(process.env.MIN_IMAGE_HEIGHT) || 100,
            maxWidth: parseInt(process.env.MAX_IMAGE_WIDTH) || 4000,
            maxHeight: parseInt(process.env.MAX_IMAGE_HEIGHT) || 4000,
            minFileSize: parseInt(process.env.MIN_FILE_SIZE) || 2000, // 2KB
            maxFileSize: parseInt(process.env.MAX_FILE_SIZE) || 10485760 // 10MB
        };

        this.browser = null;
        this.results = [];
        
        // Initialize Typesense client
        this.typesenseClient = new Typesense.Client({
            'nodes': [{
                'host': process.env.TYPESENSE_HOST || 'localhost',
                'port': process.env.TYPESENSE_PORT || '8108',
                'protocol': process.env.TYPESENSE_PROTOCOL || 'http'
            }],
            'apiKey': process.env.TYPESENSE_API_KEY,
            'connectionTimeoutSeconds': parseInt(process.env.TYPESENSE_CONNECTION_TIMEOUT) || 5
        });

        // NSFW detection keywords (comprehensive and effective)
        this.nsfwKeywords = [
            // Explicit terms
            'sex', 'porn', 'xxx', 'adult', 'nude', 'naked', 'sexy', 'erotic',
            'fetish', 'hardcore', 'explicit', 'nsfw', 'mature', 'escort',
            'webcam', 'cam', 'strip', 'milf', 'amateur', 'boobs', 'ass',
            'dick', 'pussy', 'fuck', 'orgasm', 'masturbation', 'blowjob',
            // Adult site indicators
            'pornhub', 'xvideos', 'youporn', 'redtube', 'xhamster'
        ];

        // Relaxed exclusion patterns - only exclude obvious UI elements
        this.excludePatterns = [
            // Only the most obvious UI elements
            /favicon/i, /\bicon\b/i, /sprite/i, /button/i, /arrow/i,
            // Social media and tracking pixels (very small)
            /pixel/i, /beacon/i, /1x1/i,
            // Advertisement and placeholder only if explicitly named
            /placeholder/i, /blank/i, /spacer/i, /transparent/i,
            // Very obvious low quality indicators
            /loading/i, /spinner/i, /loader/i, /dot\.png/i,
            // Only exclude avatars and profiles if very small
            /avatar.*\d{1,2}x\d{1,2}/i, /profile.*\d{1,2}x\d{1,2}/i
        ];

        // Accept most common image formats including SVG
        this.includeExtensions = ['.jpg', '.jpeg', '.png', '.webp', '.bmp', '.tiff', '.svg'];
    }

    async initializeCollections() {
        try {
            console.log('Initializing Typesense collections...');

            // Define image_results collection schema
            const imageResultsSchema = {
                'name': 'image_results',
                'fields': [
                    {'name': 'id', 'type': 'string'},
                    {'name': 'title', 'type': 'string'},
                    {'name': 'url', 'type': 'string'},
                    {'name': 'imgurl', 'type': 'string'},
                    {'name': 'keywords', 'type': 'string[]'},
                    {'name': 'favicon', 'type': 'string', 'optional': true},
                    {'name': 'date', 'type': 'string'},
                    {'name': 'safesearch', 'type': 'bool'}
                ]
            };

            // Try to create image_results collection
            try {
                await this.typesenseClient.collections().create(imageResultsSchema);
                console.log('✓ Created image_results collection');
            } catch (error) {
                if (error.message.includes('already exists')) {
                    console.log('✓ Image_results collection already exists');
                } else {
                    console.warn('Warning creating image_results collection:', error.message);
                }
            }

            console.log('Typesense collections initialized successfully');
        } catch (error) {
            console.error('Failed to initialize Typesense collections:', error.message);
            throw error;
        }
    }

    async getWebsitesToCrawl() {
        try {
            console.log(`Fetching websites for image crawling...`);
            
            const searchParameters = {
                'q': '*',
                'query_by': 'title,url',
                'filter_by': 'supportImageCrawl:=true',
                'per_page': 250,
                'page': 1
            };

            console.log('Search parameters:', searchParameters);
            
            const searchResults = await this.typesenseClient.collections('websites').documents().search(searchParameters);
            
            console.log(`Found ${searchResults.hits.length} websites with supportImageCrawl enabled`);
            
            // Filter out completed websites in JavaScript
            const websitesToCrawl = searchResults.hits
                .map(hit => hit.document)
                .filter(website => {
                    // Include if imgcrawl_status doesn't exist (never crawled) or if it's not 'complete'
                    return !website.imgcrawl_status || website.imgcrawl_status !== 'complete';
                })
                .slice(0, this.config.batchSize);

            console.log(`After filtering: ${websitesToCrawl.length} websites need image crawling`);
            
            if (websitesToCrawl.length > 0) {
                console.log('\nWebsites to crawl for images:');
                websitesToCrawl.forEach((website, index) => {
                    console.log(`${index + 1}. ${website.title} (${website.url}) - Image Status: ${website.imgcrawl_status || 'never crawled'}`);
                });
            }
            
            return websitesToCrawl;
        } catch (error) {
            console.error('Error fetching websites to crawl:', error.message);
            return [];
        }
    }

    async checkImageExists(imgUrl) {
        try {
            const searchParameters = {
                'q': `"${imgUrl}"`,
                'query_by': 'imgurl',
                'per_page': 1
            };

            const searchResults = await this.typesenseClient.collections('image_results').documents().search(searchParameters);
            return searchResults.hits.length > 0;
        } catch (error) {
            console.warn(`Error checking if image exists: ${error.message}`);
            return false;
        }
    }

    extractKeywords(text, limit = 10) {
        if (!text || text.trim() === '') return [];

        try {
            const keywords = KeywordExtractor.extract(text, {
                language: 'english',
                remove_digits: true,
                return_changed_case: true,
                remove_duplicates: true
            });

            return keywords.slice(0, limit);
        } catch (error) {
            console.warn('Keyword extraction failed:', error.message);
            return [];
        }
    }

    isImageQualityAcceptable(src, width, height, naturalWidth, naturalHeight, isSvg = false) {
        // Check file extension - now more permissive
        const hasValidExtension = this.includeExtensions.some(ext => 
            src.toLowerCase().includes(ext)
        );
        
        if (!hasValidExtension) {
            return { valid: false, reason: 'Invalid file extension' };
        }

        // More relaxed exclude patterns check
        const isExcluded = this.excludePatterns.some(pattern => pattern.test(src));
        if (isExcluded) {
            return { valid: false, reason: 'Matches exclude pattern' };
        }

        // Special handling for SVG files (they might not have natural dimensions)
        if (isSvg || src.toLowerCase().includes('.svg')) {
            // For SVG, we're more lenient since they're vector graphics
            const displayWidth = width || naturalWidth || 200; // Default assumption
            const displayHeight = height || naturalHeight || 200;
            
            if (displayWidth < 50 || displayHeight < 50) {
                return { valid: false, reason: `SVG too small: ${displayWidth}x${displayHeight}` };
            }
            
            return { valid: true, reason: 'SVG quality acceptable' };
        }

        // Check dimensions for raster images
        const displayWidth = width || naturalWidth || 0;
        const displayHeight = height || naturalHeight || 0;
        
        if (displayWidth < this.config.minWidth || displayHeight < this.config.minHeight) {
            return { valid: false, reason: `Too small: ${displayWidth}x${displayHeight}` };
        }
        
        if (displayWidth > this.config.maxWidth || displayHeight > this.config.maxHeight) {
            return { valid: false, reason: `Too large: ${displayWidth}x${displayHeight}` };
        }

        // More relaxed aspect ratio check
        const aspectRatio = displayWidth / displayHeight;
        if (aspectRatio > 8 || aspectRatio < 0.125) { // Allow wider range
            return { valid: false, reason: `Bad aspect ratio: ${aspectRatio.toFixed(2)}` };
        }

        return { valid: true, reason: 'Quality acceptable' };
    }

    // Keyword-based NSFW detection (replaces TensorFlow-based detection)
    detectNSFWByKeywords(imageUrl, title, altText, pageUrl) {
        // Combine all text sources for NSFW detection
        const textToCheck = `${imageUrl} ${title} ${altText} ${pageUrl}`.toLowerCase();
        
        // Check for NSFW keywords
        const foundNSFWKeywords = this.nsfwKeywords.filter(keyword => 
            textToCheck.includes(keyword.toLowerCase())
        );

        if (foundNSFWKeywords.length > 0) {
            console.log(`🔞 NSFW content detected. Keywords found: ${foundNSFWKeywords.join(', ')}`);
            return false; // Not safe
        }

        // Additional URL-based checks
        if (textToCheck.includes('adult') || textToCheck.includes('18+') || textToCheck.includes('nsfw')) {
            console.log(`🔞 NSFW content detected in URL/text patterns`);
            return false;
        }

        return true; // Safe
    }

    async extractContextualTitle(imgElement, page) {
        return page.evaluate((img) => {
            // Helper function to get text content and clean it
            const getCleanText = (element) => {
                if (!element) return '';
                return element.textContent.trim().substring(0, 200);
            };

            // Helper function to find the closest element by selector
            const findClosest = (element, selector) => {
                let current = element;
                while (current && current !== document.body) {
                    const found = current.querySelector(selector) || 
                                 current.previousElementSibling?.querySelector(selector) ||
                                 current.nextElementSibling?.querySelector(selector);
                    if (found) return found;
                    current = current.parentElement;
                }
                return null;
            };

            // Priority 1: Image alt text
            if (img.alt && img.alt.trim() && img.alt.trim().length > 3) {
                return img.alt.trim();
            }

            // Priority 2: Image title attribute
            if (img.title && img.title.trim() && img.title.trim().length > 3) {
                return img.title.trim();
            }

            // Priority 3: Figure caption
            const figure = img.closest('figure');
            if (figure) {
                const caption = figure.querySelector('figcaption') || figure.querySelector('.caption');
                if (caption) {
                    const captionText = getCleanText(caption);
                    if (captionText && captionText.length > 3) return captionText;
                }
            }

            // Priority 4: Nearby headings (h1-h6)
            for (let i = 1; i <= 6; i++) {
                const heading = findClosest(img, `h${i}`);
                if (heading) {
                    const headingText = getCleanText(heading);
                    if (headingText && headingText.length > 3) return headingText;
                }
            }

            // Priority 5: Parent container with common content classes
            const contentSelectors = ['.title', '.headline', '.content-title', '.post-title', '.article-title'];
            for (const selector of contentSelectors) {
                const element = findClosest(img, selector);
                if (element) {
                    const text = getCleanText(element);
                    if (text && text.length > 3) return text;
                }
            }

            // Priority 6: Nearby paragraphs or text
            let current = img.parentElement;
            while (current && current !== document.body) {
                const textElements = current.querySelectorAll('p, .description, .summary');
                for (const textEl of textElements) {
                    const text = getCleanText(textEl);
                    if (text && text.length > 10) {
                        // Return first sentence or first 100 characters
                        const firstSentence = text.split('.')[0];
                        return firstSentence.length > 10 ? firstSentence : text.substring(0, 100);
                    }
                }
                current = current.parentElement;
            }

            // Priority 7: Data attributes
            const dataTitle = img.getAttribute('data-title') || 
                            img.getAttribute('data-caption') ||
                            img.getAttribute('data-description');
            if (dataTitle && dataTitle.trim().length > 3) {
                return dataTitle.trim();
            }

            // Priority 8: Filename from src (last resort)
            try {
                const url = new URL(img.src);
                const filename = url.pathname.split('/').pop().split('.')[0];
                // Clean up filename
                const cleanFilename = filename
                    .replace(/[-_]/g, ' ')
                    .replace(/\d+/g, '')
                    .trim();
                
                if (cleanFilename.length > 3) {
                    return cleanFilename;
                }
            } catch (e) {}

            // Default fallback
            return 'Image';
        }, imgElement);
    }

    async extractFavicon(url, page) {
        try {
            const domain = new URL(url).origin;
            
            return await page.evaluate((domain) => {
                const favicon = document.querySelector('link[rel="icon"]')?.href ||
                              document.querySelector('link[rel="shortcut icon"]')?.href ||
                              document.querySelector('link[rel="apple-touch-icon"]')?.href ||
                              document.querySelector('link[rel="favicon"]')?.href;
                
                if (favicon) {
                    if (favicon.startsWith('//')) return `https:${favicon}`;
                    if (favicon.startsWith('/')) return `${domain}${favicon}`;
                    if (favicon.startsWith('http')) return favicon;
                    return `${domain}/${favicon}`;
                }
                
                return `${domain}/favicon.ico`;
            }, domain);
        } catch (error) {
            console.warn(`Could not extract favicon for ${url}:`, error.message);
            return '';
        }
    }

    // New method to find inner links for image crawling
    async findInnerLinks(url) {
        let page = null;
        
        try {
            console.log(`🔗 Finding inner links for image crawling: ${url}`);
            page = await this.browser.newPage();
            
            if (this.config.userAgent) {
                await page.setUserAgent(this.config.userAgent);
            }
            
            await page.goto(url, { waitUntil: 'networkidle2' });

            const links = await page.evaluate(() => {
                const anchors = Array.from(document.querySelectorAll('a[href]'));
                return anchors
                    .map(anchor => anchor.href)
                    .filter(href => href.startsWith('http'))
                    .filter((href, index, array) => array.indexOf(href) === index); // Remove duplicates
            });

            const innerLinks = links.slice(0, this.config.maxInnerLinksImages);
            console.log(`📎 Found ${innerLinks.length} inner links for image crawling`);
            return innerLinks;
        } catch (error) {
            console.warn(`Could not extract inner links from ${url}:`, error.message);
            return [];
        } finally {
            if (page) {
                await page.close();
            }
        }
    }

    // Enhanced image processing method with conversion capabilities
    async processImageData(imgElement, page) {
        try {
            return await page.evaluate(async (img) => {
                return new Promise((resolve) => {
                    // Create a canvas to process the image
                    const canvas = document.createElement('canvas');
                    const ctx = canvas.getContext('2d');
                    
                    const processImage = () => {
                        // Set canvas size
                        const maxSize = 800; // Max dimension for processing
                        let { width, height } = img;
                        
                        // Use natural dimensions if available
                        if (img.naturalWidth && img.naturalHeight) {
                            width = img.naturalWidth;
                            height = img.naturalHeight;
                        }
                        
                        // Scale down if too large
                        if (width > maxSize || height > maxSize) {
                            const ratio = Math.min(maxSize / width, maxSize / height);
                            width *= ratio;
                            height *= ratio;
                        }
                        
                        canvas.width = width;
                        canvas.height = height;
                        
                        try {
                            // Draw the image onto canvas
                            ctx.drawImage(img, 0, 0, width, height);
                            
                            // Convert to JPEG data URL
                            const dataUrl = canvas.toDataURL('image/jpeg', 0.8);
                            
                            resolve({
                                success: true,
                                dataUrl: dataUrl,
                                width: width,
                                height: height,
                                originalSrc: img.src
                            });
                        } catch (error) {
                            resolve({
                                success: false,
                                error: error.message,
                                width: width || img.width || 0,
                                height: height || img.height || 0,
                                originalSrc: img.src
                            });
                        }
                    };
                    
                    // If image is already loaded
                    if (img.complete && img.naturalWidth !== 0) {
                        processImage();
                    } else {
                        // Wait for image to load
                        img.onload = processImage;
                        img.onerror = () => {
                            resolve({
                                success: false,
                                error: 'Image failed to load',
                                width: 0,
                                height: 0,
                                originalSrc: img.src
                            });
                        };
                        
                        // Timeout after 5 seconds
                        setTimeout(() => {
                            resolve({
                                success: false,
                                error: 'Image load timeout',
                                width: img.width || 0,
                                height: img.height || 0,
                                originalSrc: img.src
                            });
                        }, 5000);
                    }
                });
            }, imgElement);
        } catch (error) {
            return {
                success: false,
                error: error.message,
                width: 0,
                height: 0,
                originalSrc: 'unknown'
            };
        }
    }

    async crawlImagesFromPage(url, favicon, totalImagesFound = 0) {
        let page = null;
        const results = [];
        
        try {
            console.log(`\n📸 Crawling images from: ${url}`);
            
            page = await this.browser.newPage();

            if (this.config.userAgent) {
                await page.setUserAgent(this.config.userAgent);
            }

            await page.setViewport({
                width: this.config.viewportWidth,
                height: this.config.viewportHeight
            });

            page.setDefaultNavigationTimeout(this.config.navigationTimeout);
            page.setDefaultTimeout(this.config.pageTimeout);

            await page.goto(url, { 
                waitUntil: 'networkidle2',
                timeout: this.config.navigationTimeout 
            });

            // Wait for images to load
            await page.waitForTimeout(3000);

            // Find all images on the page
            const images = await page.$$('img');
            console.log(`📷 Found ${images.length} images on this page`);

            let processedCount = 0;
            let validImagesFromPage = 0;

            for (const img of images) {
                if (totalImagesFound + validImagesFromPage >= this.config.maxImages) {
                    console.log(`⏹️ Reached global maximum images limit (${this.config.maxImages})`);
                    break;
                }

                try {
                    // Get basic image properties
                    const imgData = await page.evaluate((imgEl) => {
                        return {
                            src: imgEl.src,
                            alt: imgEl.alt || '',
                            title: imgEl.title || '',
                            width: imgEl.width || imgEl.clientWidth,
                            height: imgEl.height || imgEl.clientHeight,
                            naturalWidth: imgEl.naturalWidth,
                            naturalHeight: imgEl.naturalHeight,
                            complete: imgEl.complete,
                            loading: imgEl.loading
                        };
                    }, img);

                    processedCount++;

                    // Skip if image not loaded or invalid src
                    if (!imgData.src || imgData.src.startsWith('data:')) {
                        console.log(`⏭️ Skipping image: ${imgData.src ? 'Data URL' : 'No src'}`);
                        continue;
                    }

                    const isSvg = imgData.src.toLowerCase().includes('.svg');

                    // Enhanced quality check
                    const qualityCheck = this.isImageQualityAcceptable(
                        imgData.src, 
                        imgData.width, 
                        imgData.height, 
                        imgData.naturalWidth, 
                        imgData.naturalHeight,
                        isSvg
                    );

                    if (!qualityCheck.valid) {
                        console.log(`❌ Image rejected: ${qualityCheck.reason} - ${imgData.src}`);
                        continue;
                    }

                    // Check for duplicates
                    const imageExists = await this.checkImageExists(imgData.src);
                    if (imageExists) {
                        console.log(`⚠️ Duplicate image skipped: ${imgData.src}`);
                        continue;
                    }

                    // Extract contextual title
                    const contextualTitle = await this.extractContextualTitle(img, page);
                    
                    // Perform keyword-based NSFW detection
                    const isSafe = this.detectNSFWByKeywords(
                        imgData.src, 
                        contextualTitle, 
                        imgData.alt, 
                        url
                    );
                    
                    if (!isSafe) {
                        console.log(`🔞 NSFW image rejected: ${contextualTitle}`);
                        continue;
                    }
                    
                    // Extract keywords from title and alt text
                    const textForKeywords = `${contextualTitle} ${imgData.alt} ${imgData.title}`;
                    const keywords = this.extractKeywords(textForKeywords);

                    // For SVG or problematic images, try to process them
                    let processedImageData = null;
                    if (isSvg || !imgData.complete) {
                        processedImageData = await this.processImageData(img, page);
                    }

                    // Create result object
                    const result = {
                        url: url, // Use the page URL where image was found
                        imgurl: imgData.src,
                        title: contextualTitle,
                        keywords: keywords,
                        favicon: favicon,
                        safesearch: isSafe,
                        crawledAt: new Date().toISOString(),
                        dimensions: processedImageData?.success 
                            ? `${processedImageData.width}x${processedImageData.height}`
                            : `${imgData.naturalWidth || imgData.width}x${imgData.naturalHeight || imgData.height}`,
                        format: isSvg ? 'SVG' : 'Raster'
                    };

                    results.push(result);
                    validImagesFromPage++;

                    console.log(`✅ Valid image found: ${contextualTitle} (${result.dimensions}) - Safe: ${isSafe} - Format: ${result.format}`);

                } catch (error) {
                    console.error(`❌ Error processing image:`, error.message);
                }

                // Small delay between images
                await this.delay(100);
            }

            console.log(`🎯 Page crawl completed: ${validImagesFromPage} valid images from ${processedCount} processed`);
            return results;

        } catch (error) {
            console.error(`❌ Failed to crawl images from ${url}:`, error.message);
            return [];
        } finally {
            if (page) {
                await page.close();
            }
        }
    }

    async crawlImages(website) {
        const allResults = [];
        
        try {
            console.log(`\n🖼️ Starting image crawl for: ${website.title} (${website.url})`);
            
            // Get favicon from main page
            let page = await this.browser.newPage();
            await page.goto(website.url, { waitUntil: 'networkidle2' });
            const favicon = await this.extractFavicon(website.url, page);
            await page.close();

            // Crawl main page first
            const mainPageResults = await this.crawlImagesFromPage(website.url, favicon, 0);
            allResults.push(...mainPageResults);

            console.log(`📊 Main page results: ${mainPageResults.length} images`);

            // If we haven't reached the limit and inner links are enabled, crawl inner pages
            if (allResults.length < this.config.maxImages && this.config.maxInnerLinksImages > 0) {
                console.log(`🔗 Looking for inner links to crawl more images...`);
                
                const innerLinks = await this.findInnerLinks(website.url);
                
                for (const innerLink of innerLinks) {
                    if (allResults.length >= this.config.maxImages) {
                        console.log(`⏹️ Reached global maximum images limit (${this.config.maxImages}), stopping inner link crawl`);
                        break;
                    }

                    try {
                        console.log(`📄 Crawling inner page: ${innerLink}`);
                        const innerResults = await this.crawlImagesFromPage(innerLink, favicon, allResults.length);
                        allResults.push(...innerResults);

                        // Delay between inner pages
                        await this.delay(this.config.requestDelay);
                    } catch (error) {
                        console.error(`❌ Error crawling inner page ${innerLink}:`, error.message);
                    }
                }
            }

            console.log(`🏁 Total image crawl completed: ${allResults.length} valid images found across all pages`);
            return allResults;

        } catch (error) {
            console.error(`❌ Failed to crawl images from ${website.url}:`, error.message);
            return allResults; // Return whatever we managed to get
        }
    }

    async saveToImageResults(imageResults, sourceWebsiteId) {
        try {
            console.log(`💾 Saving ${imageResults.length} images to image_results collection...`);
            
            let successCount = 0;
            let errorCount = 0;

            for (const result of imageResults) {
                try {
                    const document = {
                        id: `${sourceWebsiteId}_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
                        title: result.title,
                        url: result.url,
                        imgurl: result.imgurl,
                        keywords: result.keywords,
                        favicon: result.favicon || '',
                        date: new Date(result.crawledAt).toISOString(),
                        safesearch: result.safesearch
                    };

                    await this.typesenseClient.collections('image_results').documents().create(document);
                    successCount++;
                    console.log(`✅ Saved image: ${result.title.substring(0, 50)}...`);

                } catch (saveError) {
                    errorCount++;
                    console.error(`❌ Failed to save image: ${saveError.message}`);
                }
            }

            console.log(`✓ Image save completed: ${successCount} successful, ${errorCount} failed`);
        } catch (error) {
            console.error('Error saving to image_results:', error.message);
            throw error;
        }
    }

    async updateWebsiteImageStatus(websiteId, status = 'complete') {
        try {
            console.log(`🔄 Updating website ${websiteId} image crawl status to: ${status}`);
            
            // First, update the imgcrawl_status
            await this.typesenseClient.collections('websites').documents(websiteId).update({
                imgcrawl_status: status
            });
            console.log(`✓ Updated website ${websiteId} imgcrawl_status to: ${status}`);
            
            // Now check if both imgcrawl_status and webcrawl_status are complete
            if (status === 'complete') {
                try {
                    // Retrieve the updated document to check both statuses
                    const document = await this.typesenseClient.collections('websites').documents(websiteId).retrieve();
                    
                    console.log(`🔍 Checking overall status for ${websiteId}:`, {
                        webcrawl_status: document.webcrawl_status || 'not set',
                        imgcrawl_status: document.imgcrawl_status
                    });
                    
                    // Check if both crawl types are complete
                    if (document.webcrawl_status === 'complete' && document.imgcrawl_status === 'complete') {
                        // Update the overall status to approved
                        await this.typesenseClient.collections('websites').documents(websiteId).update({
                            status: 'approved'
                        });
                        
                        console.log(`🎉 Updated website ${websiteId} overall status to: APPROVED (both crawls complete)`);
                    } else {
                        console.log(`⏳ Website ${websiteId} not fully approved yet:`, {
                            webcrawl_status: document.webcrawl_status || 'pending',
                            imgcrawl_status: document.imgcrawl_status,
                            overall_status: 'partial - waiting for other crawl type'
                        });
                        
                        // Set status to partial if only one type is complete
                        await this.typesenseClient.collections('websites').documents(websiteId).update({
                            status: 'partial'
                        });
                        
                        console.log(`📝 Updated website ${websiteId} overall status to: PARTIAL`);
                    }
                    
                } catch (retrieveError) {
                    console.error(`❌ Could not retrieve document ${websiteId} to check overall status:`, retrieveError.message);
                }
            }
            
        } catch (error) {
            console.error(`❌ Error updating website ${websiteId} image status:`, error.message);
        }
    }

    async initBrowser() {
        try {
            console.log('🚀 Initializing Chrome browser for image crawling...');
            
            const browserOptions = {
                headless: this.config.headless,
                args: [
                    '--no-sandbox',
                    '--disable-setuid-sandbox',
                    '--disable-dev-shm-usage',
                    '--disable-accelerated-2d-canvas',
                    '--no-first-run',
                    '--no-zygote',
                    '--disable-gpu',
                    '--disable-web-security',
                    '--disable-features=VizDisplayCompositor',
                    '--disable-images=false', // Ensure images are loaded
                    '--disable-lazy-loading' // Load all images immediately
                ]
            };

            this.browser = await puppeteer.launch(browserOptions);
            console.log('✓ Chrome browser initialized successfully');
        } catch (error) {
            console.error('❌ Failed to initialize Chrome browser:', error.message);
            throw error;
        }
    }

    delay(ms) {
        return new Promise(resolve => setTimeout(resolve, ms));
    }

    async runImageCrawlLoop() {
        try {
            await this.initializeCollections();
            await this.initBrowser();

            let processedBatches = 0;
            let totalProcessed = 0;

            console.log(`📋 Modern Image Crawler Configuration:`);
            console.log(`   Max Images Per Website: ${this.config.maxImages}`);
            console.log(`   Max Inner Links Per Website: ${this.config.maxInnerLinksImages}`);
            console.log(`   Batch Size: ${this.config.batchSize}`);
            console.log(`   NSFW Detection: Keyword-based (Fast & Reliable)`);
            console.log(`   Supported Formats: JPG, PNG, WebP, BMP, TIFF, SVG`);

            while (true) {
                console.log('\n' + '='.repeat(60));
                console.log(`🖼️ Starting image crawl batch ${processedBatches + 1}...`);
                console.log('='.repeat(60));

                const websitesToCrawl = await this.getWebsitesToCrawl();
                
                if (websitesToCrawl.length === 0) {
                    console.log('✅ No more websites to crawl for images. Exiting...');
                    break;
                }

                for (const website of websitesToCrawl) {
                    try {
                        console.log(`\n📸 Processing website for images: ${website.title} (${website.url})`);
                        
                        const imageResults = await this.crawlImages(website);
                        
                        if (imageResults.length > 0) {
                            await this.saveToImageResults(imageResults, website.id);
                        } else {
                            console.log('📝 No valid images found on this website');
                        }
                        
                        await this.updateWebsiteImageStatus(website.id, 'complete');
                        
                        totalProcessed++;
                        console.log(`✅ Completed image processing for website: ${website.title} (${imageResults.length} images saved)`);
                        
                        // Delay between websites
                        await this.delay(this.config.requestDelay);
                        
                    } catch (error) {
                        console.error(`❌ Failed to process images for website ${website.url}:`, error.message);
                        
                        try {
                            await this.updateWebsiteImageStatus(website.id, 'failed');
                        } catch (updateError) {
                            console.error(`❌ Failed to update image status for ${website.id}:`, updateError.message);
                        }
                    }
                }

                processedBatches++;
                console.log(`\n✅ Completed image crawl batch ${processedBatches}. Total websites processed: ${totalProcessed}`);
                
                // Short delay between batches
                await this.delay(3000);
            }

            console.log('\n' + '='.repeat(60));
            console.log('🎉 IMAGE CRAWL SUMMARY');
            console.log('='.repeat(60));
            console.log(`Total batches processed: ${processedBatches}`);
            console.log(`Total websites processed: ${totalProcessed}`);
            console.log('All websites have been crawled for images!');

        } catch (error) {
            console.error('❌ Image crawl loop failed:', error.message);
            throw error;
        } finally {
            if (this.browser) {
                await this.browser.close();
                console.log('🔒 Browser closed.');
            }
        }
    }
}

// Main execution
async function main() {
    const crawler = new ImageCrawler();
    
    try {
        await crawler.runImageCrawlLoop();
    } catch (error) {
        console.error('❌ Image crawler execution failed:', error.message);
        process.exit(1);
    }
}

// Run the crawler
if (require.main === module) {
    main().catch(console.error);
}

module.exports = ImageCrawler;