<?php

namespace App\Services;

use App\DTO\CropResult;
use Google\Cloud\Vision\V1\Client\ImageAnnotatorClient;
use Google\Cloud\Vision\V1\Image;
use Illuminate\Support\Facades\Log;
use Exception;
use Imagick;

class OcrService
{
    protected ?ImageAnnotatorClient $visionClient = null;

    public function __construct()
    {
        $credentialsPath = config('services.google_vision.credentials_path');
        
        // Convert relative path to absolute path
        if (!str_starts_with($credentialsPath, '/')) {
            $credentialsPath = base_path($credentialsPath);
        }
        
        if (!file_exists($credentialsPath)) {
            throw new Exception("Google Vision credentials file not found at: {$credentialsPath}");
        }
        
        $this->visionClient = new ImageAnnotatorClient([
            'credentials' => $credentialsPath
        ]);
    }

    /**
     * Extract text from image using Google Cloud Vision API
     */
    public function extractText(string $imagePath): string
    {
        return $this->extractWithGoogleVision($imagePath);
    }

    /**
     * Extract text using Google Cloud Vision API
     */
    protected function extractWithGoogleVision(string $imagePath): string
    {
        $imageContent = file_get_contents($imagePath);
        
        // Create the image object
        $image = new \Google\Cloud\Vision\V1\Image();
        $image->setContent($imageContent);
        
        // Create the feature
        $feature = new \Google\Cloud\Vision\V1\Feature();
        $feature->setType(\Google\Cloud\Vision\V1\Feature\Type::DOCUMENT_TEXT_DETECTION);
        
        // Create the request
        $request = new \Google\Cloud\Vision\V1\AnnotateImageRequest();
        $request->setImage($image);
        $request->setFeatures([$feature]);
        
        // Create batch request
        $batchRequest = new \Google\Cloud\Vision\V1\BatchAnnotateImagesRequest();
        $batchRequest->setRequests([$request]);
        
        // Call the API
        $response = $this->visionClient->batchAnnotateImages($batchRequest);
        
        // Process response
        $responses = $response->getResponses();
        if ($responses->count() == 0) return '';
        
        $texts = $responses[0]->getTextAnnotations();

        if ($texts->count() === 0) {
            return '';
        }

        // First annotation contains the entire text
        return $texts[0]->getDescription();
    }
    
    /**
     * Perform OCR on a set of pre-cropped images and return array of results.
     * Each result contains the crop metadata and the OCR text from the crop.
     *
     * @param CropResult[] $crops Array of CropResult objects
     * @return CropResult[]
     */
    public function ocrCrops(array $crops): array
    {
        $out = [];
        foreach ($crops as $crop) {
            try {
                $crop->text = $this->extractWithGoogleVision($crop->path);
            } catch (\Throwable $e) {
                Log::error('OCR crop failed', ['path' => $crop->path, 'error' => $e->getMessage()]);
                $crop->text = '';
            }
            $out[] = $crop;
        }
        return $out;
    }    /**
     * Extract structured text data with bounding boxes using Google Cloud Vision
     */
    public function extractStructuredData(string $imagePath): array
    {
        return $this->extractStructuredWithGoogleVision($imagePath);
    }

    /**
     * Extract structured data with bounding boxes from Google Vision
     */
    protected function extractStructuredWithGoogleVision(string $imagePath): array
    {
        try {
            $imageContent = file_get_contents($imagePath);
            
            // Create the image object
            $image = new \Google\Cloud\Vision\V1\Image();
            $image->setContent($imageContent);
            
            // Create the feature
            $feature = new \Google\Cloud\Vision\V1\Feature();
            $feature->setType(\Google\Cloud\Vision\V1\Feature\Type::DOCUMENT_TEXT_DETECTION);
            
            // Create the request
            $request = new \Google\Cloud\Vision\V1\AnnotateImageRequest();
            $request->setImage($image);
            $request->setFeatures([$feature]);
            
            // Create batch request
            $batchRequest = new \Google\Cloud\Vision\V1\BatchAnnotateImagesRequest();
            $batchRequest->setRequests([$request]);
            
            // Call the API
            $response = $this->visionClient->batchAnnotateImages($batchRequest);
        } catch (Exception $e) {
            Log::error('Google Vision API call failed', [
                'error' => $e->getMessage(),
                'file' => $imagePath
            ]);
            throw $e;
        }
        
        
        // Process response
        $responses = $response->getResponses();
        if ($responses->count() == 0) {
            Log::warning('Google Vision returned no responses');
            return [];
        }
        
        // Check for errors in response
        $firstResponse = $responses[0];
        if ($firstResponse->hasError()) {
            $error = $firstResponse->getError();
            Log::error('Google Vision response contains error', [
                'code' => $error->getCode(),
                'message' => $error->getMessage()
            ]);
            throw new Exception('Google Vision API error: ' . $error->getMessage());
        }
        
        $annotations = $firstResponse->getTextAnnotations();
        
        // If no text detected, return empty
        if (count($annotations) == 0) {
            Log::info('Google Vision detected no text in image');
            return [];
        }
        
        $words = [];
        
        // Log the full text (first annotation) to see what Google Vision detected
        if ($annotations->count() > 0) {
            $fullText = $annotations[0]->getDescription();
            Log::debug('Google Vision full text', ['length' => strlen($fullText), 'preview' => substr($fullText, 0, 500)]);
            Log::debug('Google Vision full text raw', ['text' => $fullText]);
        }
        
        // Skip first annotation (full text) and process individual words
        foreach ($annotations as $i => $annotation) {
            if ($i === 0) continue; // Skip full text annotation
            
            $vertices = $annotation->getBoundingPoly()->getVertices();
            
            // Calculate bounding box
            $left = PHP_INT_MAX;
            $top = PHP_INT_MAX;
            $right = 0;
            $bottom = 0;
            
            foreach ($vertices as $vertex) {
                $x = $vertex->getX();
                $y = $vertex->getY();
                $left = min($left, $x);
                $top = min($top, $y);
                $right = max($right, $x);
                $bottom = max($bottom, $y);
            }
            
            $width = $right - $left;
            $height = $bottom - $top;
            
            $cleanText = $this->normalizeText($annotation->getDescription());
            $words[] = [
                'text' => $cleanText,
                'left' => $left,
                'top' => $top,
                'width' => $width,
                'height' => $height,
                'conf' => 99, // Google Vision doesn't provide confidence per word
                'right' => $right,
                'bottom' => $bottom,
                'center_x' => $left + ($width / 2),
                'center_y' => $top + ($height / 2),
                'block_num' => 0
            ];
        }
        
        Log::info('Google Vision extraction completed', [
            'words_extracted' => count($words),
            'image' => basename($imagePath)
        ]);
        
        return $words;
    }

    private function normalizeText(string $text): string
    {
        $text = trim($text);

        // Collapse character-by-character spacing only when the entire token is spaced out
        if (preg_match('/^(?:[A-Z0-9]\s){2,}[A-Z0-9]$/i', $text)) {
            $text = preg_replace('/\s+/', '', $text);
        }

        // Normalize common OCR dashes
        $text = str_replace(['—', '–'], '-', $text);

        return $text;
    }

    public function __destruct()
    {
        if ($this->visionClient) {
            $this->visionClient->close();
        }
    }

    /**
     * Crop the source image into a grid of image files based on dynamic layout analysis.
     * This method uses Tesseract's TSV output to locate voter boxes.
     *
     * @param string $imagePath
     * @return CropResult[] Array of CropResult objects
     */
    public function cropVoterBoxes(string $imagePath, int $columns = 3, int $rows = 10): array
    {
        if (!file_exists($imagePath)) {
            throw new \InvalidArgumentException("Image not found: {$imagePath}");
        }

        // 1. Run Tesseract with TSV output to get layout data.
        $tsvPath = $this->runTesseractTsv($imagePath);
        if (!$tsvPath || !file_exists($tsvPath)) {
            Log::error('TSV file was not created or is empty.');
            return [];
        }
        $tsvContent = file_get_contents($tsvPath);

        // 2. Parse the TSV to find the bounding boxes of voter cards.
        $layoutParser = new TsvLayoutParser();
        $voterBoxes = $layoutParser->findVoterCardBoxes($tsvContent, $columns, $rows);

        if (empty($voterBoxes)) {
            Log::warning('No voter boxes found from TSV layout analysis.', ['image' => $imagePath]);
            return [];
        }

        // 3. Crop the image based on the identified boxes.
        $tmpDir = storage_path('app/ocr_crops');
        if (!is_dir($tmpDir)) {
            mkdir($tmpDir, 0755, true);
        }

        $results = [];
        $boxIndex = 0;
        for ($r = 1; $r <= $rows; $r++) {
            for ($c = 1; $c <= $columns; $c++) {
                if (!isset($voterBoxes[$boxIndex])) continue;
                
                $box = $voterBoxes[$boxIndex];
                $tmpPath = $tmpDir . DIRECTORY_SEPARATOR . sprintf('crop_r%d_c%d_%s.png', $r, $c, uniqid());

                $this->cropImage($imagePath, $tmpPath, $box['x'], $box['y'], $box['width'], $box['height']);

                $results[] = new CropResult(
                    row: $r,
                    col: $c,
                    path: $tmpPath,
                    x: $box['x'],
                    y: $box['y'],
                    w: $box['width'],
                    h: $box['height']
                );
                $boxIndex++;
            }
        }
        
        // Clean up the temporary TSV file.
        @unlink($tsvPath);

        return $results;
    }

    /**
     * Runs Tesseract on an image and generates a TSV file with layout information.
     *
     * @param string $imagePath
     * @return string|null The path to the generated TSV file, or null on failure.
     */
    public function runTesseractTsv(string $imagePath): ?string
    {
        $baseDir = storage_path('app/tsv_output');
        if (!is_dir($baseDir)) {
            mkdir($baseDir, 0755, true);
        }
        
        $baseName = pathinfo($imagePath, PATHINFO_FILENAME);
        $tsvOutputBase = $baseDir . '/' . $baseName . '_' . uniqid();
        $tsvPath = $tsvOutputBase . '.tsv';

        // The `tesseract` command requires the output path *without* the extension.
        $command = sprintf(
            'tesseract %s %s --psm 6 tsv',
            escapeshellarg($imagePath),
            escapeshellarg($tsvOutputBase)
        );

        exec($command, $output, $returnCode);

        if ($returnCode !== 0 || !file_exists($tsvPath)) {
            Log::error('Tesseract TSV generation failed.', [
                'command' => $command,
                'return_code' => $returnCode,
                'output' => $output,
            ]);
            return null;
        }

        return $tsvPath;
    }

    /**
     * Helper function to crop an image.
     */
    private function cropImage(string $sourcePath, string $destPath, int $x, int $y, int $w, int $h): void
    {
        if (extension_loaded('imagick') && class_exists('\Imagick')) {
            $im = new \Imagick($sourcePath);
            $im->setImageFormat('png');
            $im->cropImage($w, $h, $x, $y);
            $im->setImagePage(0, 0, 0, 0);
            $im->writeImage($destPath);
            $im->clear();
            $im->destroy();
        } else {
            $srcData = file_get_contents($sourcePath);
            $src = imagecreatefromstring($srcData);
            $dst = imagecreatetruecolor($w, $h);
            $white = imagecolorallocate($dst, 255, 255, 255);
            imagefill($dst, 0, 0, $white);
            imagecopy($dst, $src, 0, 0, $x, $y, $w, $h);
            imagepng($dst, $destPath);
            imagedestroy($src);
            imagedestroy($dst);
        }
    }

    /**
     * Crop the source image into a grid of image files and return their paths.
     * This method only writes crop image files; it does not call the Vision API.
     *
     * @param string $imagePath
     * @param int $cols
     * @param int $rows
     * @param array $options (crop_width, crop_height, x_offset, y_offset, pad_x, pad_y)
     * @return CropResult[] Array of CropResult objects
     */
    public function cropGridImages(string $imagePath, int $cols = 3, int $rows = 10, array $options = []): array
    {
        if (!file_exists($imagePath)) {
            throw new \InvalidArgumentException("Image not found: {$imagePath}");
        }

        // If dynamic detection requested, first attempt to infer grid parameters.
        if (!empty($options['dynamic']) && $options['dynamic'] === true) {
            try {
                $detected = $this->detectGridParametersFromImage($imagePath, $cols, $rows);
                if ($detected) {
                    // Merge detected overrides into options so downstream math uses them.
                    $options = array_merge($options, [
                        'x_offset' => $detected['x_offset'],
                        'y_offset' => $detected['y_offset'],
                        'crop_width' => $detected['crop_width'],
                        'crop_height' => $detected['crop_height'],
                        // reduce pad to avoid clipping serial/name text
                        'pad_x' => $options['pad_x'] ?? 4,
                        'pad_y' => $options['pad_y'] ?? 4,
                    ]);
                    Log::info('Dynamic grid parameters detected', $detected);
                } else {
                    Log::warning('Dynamic grid detection failed; falling back to static options');
                }
            } catch (\Throwable $e) {
                Log::error('Dynamic grid detection threw exception; falling back to static options', ['error' => $e->getMessage()]);
            }
        }

        $opts = array_merge([
            'crop_width' => null,
            'crop_height' => null,
            'x_offset' => 0,
            'y_offset' => 0,
            // Amount of pixels to exclude from the bottom of the image when
            // computing grid height (to avoid footers being included in rows)
            'bottom_offset' => 0,
            'pad_x' => 6,
            'pad_y' => 6,
            // Optional per-row overrides
            // Example: ['row_pad_y' => [10 => 0], 'row_extend_bottom' => [10 => 30]]
            'row_pad_y' => [],
            'row_extend_bottom' => [],
        ], $options);

        [$imgW, $imgH] = getimagesize($imagePath);
        $imgW = (int)$imgW; $imgH = (int)$imgH;

        $cellW = $opts['crop_width'] ? (int)$opts['crop_width'] : (int)floor(($imgW - $opts['x_offset']) / $cols);
        $usableH = max(1, $imgH - $opts['y_offset'] - max(0, (int)$opts['bottom_offset']));
        $cellH = $opts['crop_height'] ? (int)$opts['crop_height'] : (int)floor($usableH / $rows);

        $tmpDir = storage_path('app/ocr_crops');
        if (!is_dir($tmpDir)) mkdir($tmpDir, 0755, true);

        $results = [];

        for ($r = 0; $r < $rows; $r++) {
            for ($c = 0; $c < $cols; $c++) {
                $x = $opts['x_offset'] + (int)round($c * $cellW);
                $y = $opts['y_offset'] + (int)round($r * $cellH);
                $padX = (int)$opts['pad_x'];
                // Allow per-row pad_y override
                $effectivePadY = isset($opts['row_pad_y'][$r+1])
                    ? (int)$opts['row_pad_y'][$r+1]
                    : (int)$opts['pad_y'];

                $w = max(1, $cellW - $padX * 2);
                $h = max(1, $cellH - $effectivePadY * 2);
                $cropX = max(0, $x + $padX);
                $cropY = max(0, $y + $effectivePadY);

                // Optionally extend crop downward for specific rows to include trailing ID lines
                $extendBottom = isset($opts['row_extend_bottom'][$r+1]) ? (int)$opts['row_extend_bottom'][$r+1] : 0;
                if ($extendBottom > 0) {
                    $h += $extendBottom;
                }

                if ($cropX + $w > $imgW) $w = $imgW - $cropX;
                if ($cropY + $h > $imgH) $h = $imgH - $cropY;
                if ($w <= 0 || $h <= 0) continue;

                $tmpPath = $tmpDir . DIRECTORY_SEPARATOR . sprintf('crop_r%d_c%d_%s.png', $r+1, $c+1, uniqid());

                if (extension_loaded('imagick') && class_exists('\Imagick')) {
                    $imClass = '\Imagick';
                    /** @var \Imagick $im */
                    $im = new $imClass($imagePath);
                    $im->setImageFormat('png');
                    $im->cropImage($w, $h, $cropX, $cropY);
                    $im->setImagePage(0,0,0,0);
                    $im->writeImage($tmpPath);
                    $im->clear();
                    $im->destroy();
                } else {
                    $srcData = file_get_contents($imagePath);
                    $src = imagecreatefromstring($srcData);
                    $dst = imagecreatetruecolor($w, $h);
                    $white = imagecolorallocate($dst, 255,255,255);
                    imagefill($dst, 0, 0, $white);
                    imagecopy($dst, $src, 0, 0, $cropX, $cropY, $w, $h);
                    imagepng($dst, $tmpPath);
                    imagedestroy($src);
                    imagedestroy($dst);
                }

                $results[] = new CropResult(
                    row: $r + 1,
                    col: $c + 1,
                    path: $tmpPath,
                    x: $cropX,
                    y: $cropY,
                    w: $w,
                    h: $h
                );
            }
        }

        return $results;
    }

    /**
     * Attempt to detect grid parameters (top-left offset, box width/height) by analyzing TSV word boxes.
     * Strategy:
     *  - Use Tesseract TSV output (psm 6) to get word bounding boxes
     *  - Identify candidate serial number boxes: words that are purely 1-3 digits and within plausible range
     *  - Group first row serials by y proximity (within 1.5 * average height)
     *  - Derive horizontal spacing (average delta x between consecutive serial boxes)
     *  - Derive vertical spacing using the next row of serial numbers
     *  - Return inferred x_offset, y_offset, crop_width, crop_height
     */
    protected function detectGridParametersFromImage(string $imagePath, int $cols, int $rows): ?array
    {
        $tsv = $this->runTesseractTsv($imagePath);
        if (!$tsv || !file_exists($tsv)) return null;
        $lines = file($tsv, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
        @unlink($tsv);
        $words = [];
        foreach ($lines as $l) {
            // Skip header line (starts with "level")
            if (str_starts_with($l, 'level')) continue;
            $parts = explode("\t", $l);
            // Expect at least 12 columns
            if (count($parts) < 12) continue;
            [$level,$page,$block,$par,$line,$word,$left,$top,$width,$height,$conf,$text] = $parts;
            $text = trim($text);
            if ($text === '') continue;
            $left = (int)$left; $top = (int)$top; $width = (int)$width; $height = (int)$height;
            $words[] = compact('text','left','top','width','height');
        }
        if (empty($words)) return null;

        // Candidate serial numbers: purely digits length 1-3 (allow up to 4) and value < 4000
        $serialBoxes = array_filter($words, function($w){
            return preg_match('/^\d{1,4}$/', $w['text']) && (int)$w['text'] > 0 && (int)$w['text'] <= 4000;
        });
        if (count($serialBoxes) < $cols) {
            return null; // not enough hits to infer a row
        }
        // Sort by y then x
        usort($serialBoxes, function($a,$b){ return $a['top'] <=> $b['top'] ?: $a['left'] <=> $b['left']; });
        // Determine first row: take first box, then include subsequent boxes whose top within tolerance
        $firstTop = $serialBoxes[0]['top'];
        $avgHeight = $serialBoxes[0]['height'];
        $rowTolerance = max(15, (int)round($avgHeight * 1.5));
        $firstRow = [];
        foreach ($serialBoxes as $sb) {
            if (abs($sb['top'] - $firstTop) <= $rowTolerance) {
                $firstRow[] = $sb;
            } else {
                if (count($firstRow) >= $cols) break;
            }
        }
        // Ensure we have enough columns identified; if not, abort.
        if (count($firstRow) < $cols) return null;
        // Keep only first $cols leftmost of the first row
        usort($firstRow, fn($a,$b) => $a['left'] <=> $b['left']);
        $firstRow = array_slice($firstRow, 0, $cols);

        // Horizontal spacing: deltas between consecutive serial boxes (left positions)
        $deltas = [];
        for ($i=1; $i<count($firstRow); $i++) {
            $deltas[] = $firstRow[$i]['left'] - $firstRow[$i-1]['left'];
        }
        if (empty($deltas)) return null;
        $avgDeltaX = (int)round(array_sum($deltas)/count($deltas));

        // Estimate crop width: use avg delta (assumes uniform grid); allow final box same width
        $cropWidth = $avgDeltaX; // will be trimmed by grid code

        // x_offset: minimal left minus small padding
        $minLeft = min(array_map(fn($b)=>$b['left'], $firstRow));
        $xOffset = max(0, $minLeft - 10);

        // Find second row (serial boxes with top sufficiently below firstTop)
        $secondRowCandidates = array_filter($serialBoxes, fn($b) => $b['top'] > $firstTop + $rowTolerance);
        usort($secondRowCandidates, fn($a,$b) => $a['top'] <=> $b['top']);
        $secondTop = null;
        if (!empty($secondRowCandidates)) {
            $secondTop = $secondRowCandidates[0]['top'];
        }
        // Vertical spacing
        $deltaY = $secondTop ? ($secondTop - $firstTop) : null;
        if ($deltaY === null || $deltaY <= 0) {
            return null; // cannot determine height
        }
        $cropHeight = $deltaY; // approximate card height

        // y_offset: position of first row serial minus padding and allow for serial box being slightly below true top
        $yOffset = max(0, $firstTop - 20);

        return [
            'x_offset' => $xOffset,
            'y_offset' => $yOffset,
            'crop_width' => $cropWidth,
            'crop_height' => $cropHeight,
            'detected_first_row_top' => $firstTop,
            'detected_second_row_top' => $secondTop,
            'avg_delta_x' => $avgDeltaX,
            'delta_y' => $deltaY,
        ];
    }
}
