<?php

namespace App\Services\Concerns;

use Illuminate\Support\Facades\Log;
use App\Services\OcrService;

trait ParsesVoterBlocks
{
    /**
     * Parse voters from OCR using layout-aware TSV extraction with bounding boxes
     * This method replaces linear text parsing to fix field alignment issues
     */
    public function parseVotersFromOcr(string $text, ?string $boothNumber = null, ?string $imagePath = null): array
    {
        /**
         * Strict Layout Parsing (Option 1)
         * NEW ENGINE — deterministic bounding‑box segmentation
         */
        $ocrService = new OcrService();
        $rawWords = $ocrService->extractStructuredData($imagePath);

        if (empty($rawWords)) {
            return [];
        }

        // 1. Preprocess & normalize all bounding boxes
        $normalizedWords = $this->normalizeWords($rawWords);
        $cardWords = $this->filterCardRegion($normalizedWords);

        // 2. Detect grid (columns + rows) - REPLACED with fixed grid
        $grid = $this->createFallbackGrid($cardWords);

        // 3. Segment into card boxes
        $cards = $this->segmentCardsFromGrid($cardWords, $grid);
        
        Log::debug('Grid segmentation complete', [
            'total_cards' => count($cards),
            'columns' => count($grid['columns'] ?? []),
            'rows' => count($grid['rows'] ?? [])
        ]);

        // 4. Extract fields from each card
        $voters = [];
        $serial = 1;
        $skippedCards = 0;
        
        Log::info('Processing voter cards', [
            'total_cards_detected' => count($cards),
            'grid_columns' => count($grid['columns'] ?? []),
            'grid_rows' => count($grid['rows'] ?? [])
        ]);
        
        foreach ($cards as $cardIndex => $cardBox) {
            $voter = $this->extractCardFields($cardBox, $normalizedWords, $serial, $boothNumber);
            if ($voter !== null) {
                $voters[] = $voter;
            } else {
                $skippedCards++;
                Log::warning('Skipped voter card during extraction', [
                    'card_index' => $cardIndex,
                    'expected_serial' => $serial,
                    'words_in_card' => count($cardBox['words'] ?? [])
                ]);
            }
            $serial++;
        }
        
        if ($skippedCards > 0) {
            Log::warning('Some voter cards were skipped', [
                'total_cards' => count($cards),
                'successful_extractions' => count($voters),
                'skipped_cards' => $skippedCards,
                'success_rate' => round((count($voters) / count($cards)) * 100, 1) . '%'
            ]);
        }

        return $voters;
    }

    protected function normalizeWords(array $words): array
    {
        // Ensure numeric coordinates and compute centers/height for easier processing
        $normalized = [];
        foreach ($words as $w) {
            // allow input either as associative array or object
            $left = isset($w['left']) ? (int)$w['left'] : (int)($w->left ?? 0);
            $top = isset($w['top']) ? (int)$w['top'] : (int)($w->top ?? 0);
            $width = isset($w['width']) ? (int)$w['width'] : (int)($w->width ?? 0);
            $height = isset($w['height']) ? (int)$w['height'] : (int)($w->height ?? 0);
            $text = isset($w['text']) ? trim($w['text']) : trim($w->text ?? '');

            // compute centers
            $cx = $left + (int)($width / 2);
            $cy = $top + (int)($height / 2);

            $normalized[] = [
                'text' => $text,
                'left' => $left,
                'top' => $top,
                'width' => $width,
                'height' => $height,
                'right' => $left + $width,
                'bottom' => $top + $height,
                'center_x' => $cx,
                'center_y' => $cy,
                'cx' => $cx,
                'cy' => $cy,
            ];
        }

        return $normalized;
    }

    protected function filterCardRegion(array $words): array
    {
        if (empty($words)) {
            return $words;
        }

        $serialAnchors = array_filter($words, function ($word) {
            $text = trim($word['text']);
            return preg_match('/^\d{1,2}$/', $text) && $word['top'] > 120;
        });

        if (empty($serialAnchors)) {
            return $words;
        }

        $minY = min(array_column($serialAnchors, 'top')) - 40;
        $minY = max(0, $minY);

        $genderWords = array_filter($words, function ($word) {
            return preg_match('/(Gender|Male|Female|Available|House|Age|DELETED)/i', $word['text']);
        });

        $maxY = !empty($genderWords)
            ? max(array_column($genderWords, 'bottom')) + 40
            : max(array_column($serialAnchors, 'bottom')) + 220;

        return array_values(array_filter($words, function ($word) use ($minY, $maxY) {
            return $word['bottom'] >= $minY && $word['top'] <= $maxY;
        }));
    }

    protected function detectGridLayout(array $words): array
    {
        $anchors = $this->findSerialAnchors($words);
        
        Log::debug('Serial anchors found', [
            'count' => count($anchors),
            'serials' => array_column($anchors, 'serial')
        ]);

        if (count($anchors) >= 12) {
            $grid = $this->buildGridFromSerialAnchors($anchors, $words);
            if (!empty($grid['columns']) && !empty($grid['rows'])) {
                return $grid;
            }
        }

        return $this->detectGridLayoutUsingGaps($words);
    }

    protected function findSerialAnchors(array $words): array
    {
        $anchors = [];

        // First pass: identify words that are preceded by "Age" to exclude them
        $ageValues = [];
        for ($i = 1; $i < count($words); $i++) {
            if (preg_match('/Age/i', $words[$i - 1]['text'])) {
                $text = trim($words[$i]['text']);
                if (preg_match('/^\d{1,3}$/', $text)) {
                    $ageValues[] = $i; // Mark this word index as an age value
                }
            }
        }

        // Second pass: find serial numbers (enhanced to catch more cases)
        foreach ($words as $idx => $word) {
            // Skip if this is an age value
            if (in_array($idx, $ageValues)) {
                continue;
            }

            $text = trim($word['text']);
            if (!preg_match('/^\d{1,3}$/', $text)) {
                continue;
            }

            $serial = (int)$text;
            
            // Expanded serial range to handle 3-digit serials (like 211-240)
            if ($serial < 1 || $serial > 999) {
                continue;
            }

            // Skip header area but be more lenient (reduced from 150px to 120px)
            if ($word['top'] < 120) {
                continue;
            }

            // Be more lenient with serial number size (increased from 50px to 60px)
            if ($word['height'] > 60) {
                continue;
            }

            $anchors[] = array_merge($word, [
                'serial' => $serial,
            ]);
        }

        return $anchors;
    }

    protected function buildGridFromSerialAnchors(array $anchors, array $words): array
    {
        if (empty($anchors)) {
            return ['columns' => [], 'rows' => [], 'page_bounds' => []];
        }

        usort($anchors, function ($a, $b) {
            if (abs($a['top'] - $b['top']) < 20) {
                return $a['left'] <=> $b['left'];
            }
            return $a['top'] <=> $b['top'];
        });

        $anchorHeights = array_column($anchors, 'height');
        $medianAnchorHeight = max(1, (int)$this->median($anchorHeights));
        $rowThreshold = max((int)($medianAnchorHeight * 6), 120);

        $rows = [];
        foreach ($anchors as $anchor) {
            if (empty($rows)) {
                $rows[] = [
                    'anchors' => [$anchor],
                    'min_top' => $anchor['top'],
                    'max_top' => $anchor['bottom'],
                ];
                continue;
            }

            $currentIndex = count($rows) - 1;
            if ($anchor['top'] > $rows[$currentIndex]['max_top'] + $rowThreshold) {
                $rows[] = [
                    'anchors' => [$anchor],
                    'min_top' => $anchor['top'],
                    'max_top' => $anchor['bottom'],
                ];
            } else {
                $rows[$currentIndex]['anchors'][] = $anchor;
                $rows[$currentIndex]['min_top'] = min($rows[$currentIndex]['min_top'], $anchor['top']);
                $rows[$currentIndex]['max_top'] = max($rows[$currentIndex]['max_top'], $anchor['bottom']);
            }
        }

        if (count($rows) <= 1) {
            return ['columns' => [], 'rows' => [], 'page_bounds' => []];
        }

        usort($rows, fn($a, $b) => $a['min_top'] <=> $b['min_top']);

        $rowTops = array_column($rows, 'min_top');
        $rowSpacingDiffs = [];
        for ($i = 0; $i < count($rowTops) - 1; $i++) {
            $rowSpacingDiffs[] = $rowTops[$i + 1] - $rowTops[$i];
        }
        $medianRowSpacing = !empty($rowSpacingDiffs) ? (int)$this->median($rowSpacingDiffs) : 260;
        $rowPadding = max(20, (int)($medianRowSpacing * 0.25));
        $rowHeight = max(180, (int)($medianRowSpacing * 0.95));

        $finalRows = [];
        $rowCount = count($rows);
        foreach ($rows as $index => $row) {
            $minY = max(0, $row['min_top'] - $rowPadding);
            $maxY = $minY + $rowHeight;

            if ($index < $rowCount - 1) {
                $nextTop = $rows[$index + 1]['min_top'];
                $mid = (int)(($row['min_top'] + $nextTop) / 2);
                $maxY = min($maxY, $mid + $rowPadding);
            }

            $finalRows[] = [
                'min_y' => $minY,
                'max_y' => $maxY,
            ];
        }

        usort($finalRows, fn($a, $b) => $a['min_y'] <=> $b['min_y']);
        $finalRows = array_slice($finalRows, 0, 10);

        $allCenters = array_column($anchors, 'center_x');
        $assignments = $this->kMeansCluster($allCenters, 3);
        foreach ($anchors as $i => $anchor) {
            $anchors[$i]['column'] = $assignments[$i] ?? 0;
        }

        $clusters = [];
        foreach ($anchors as $anchor) {
            $clusters[$anchor['column']][] = $anchor;
        }

        if (count($clusters) < 3) {
            return ['columns' => [], 'rows' => [], 'page_bounds' => []];
        }

        $clusterInfo = [];
        foreach ($clusters as $index => $clusterAnchors) {
            if (empty($clusterAnchors)) {
                continue;
            }
            $clusterInfo[] = [
                'index' => $index,
                'median' => (int)$this->median(array_column($clusterAnchors, 'center_x')),
            ];
        }

        if (count($clusterInfo) < 3) {
            return ['columns' => [], 'rows' => [], 'page_bounds' => []];
        }

        usort($clusterInfo, fn($a, $b) => $a['median'] <=> $b['median']);

        $overallMinX = min(array_column($anchors, 'left'));
        $overallMaxX = max(array_column($anchors, 'right'));
        $colPadding = 50;

        $columns = [];
        foreach ($clusterInfo as $idx => $info) {
            $prevMedian = $clusterInfo[$idx - 1]['median'] ?? null;
            $nextMedian = $clusterInfo[$idx + 1]['median'] ?? null;

            $minX = $prevMedian === null
                ? $overallMinX - $colPadding
                : (int)(($prevMedian + $info['median']) / 2);
            $maxX = $nextMedian === null
                ? $overallMaxX + $colPadding
                : (int)(($info['median'] + $nextMedian) / 2);

            $columns[] = [
                'min_x' => $minX,
                'max_x' => $maxX,
            ];
        }

        usort($columns, fn($a, $b) => $a['min_x'] <=> $b['min_x']);

        $pageMinY = $finalRows[0]['min_y'] ?? 0;
        $pageMaxY = $finalRows ? max(array_column($finalRows, 'max_y')) : max(array_column($words, 'bottom'));

        return [
            'columns' => $columns,
            'rows' => $finalRows,
            'page_bounds' => [
                'min_x' => $overallMinX - $colPadding,
                'max_x' => $overallMaxX + $colPadding,
                'min_y' => $pageMinY,
                'max_y' => $pageMaxY + 40,
            ],
        ];
    }

    protected function detectGridLayoutUsingGaps(array $words): array
    {
        // Expecting 3 vertical columns.
        if (empty($words)) {
            return ['columns' => [], 'rows' => []];
        }

        // compute overall page bounds
        $minX = PHP_INT_MAX; $maxX = 0; $minY = PHP_INT_MAX; $maxY = 0;
        $xCenters = []; $yCenters = []; $heights = [];
        foreach ($words as $w) {
            $minX = min($minX, $w['left']);
            $maxX = max($maxX, $w['left'] + $w['width']);
            $minY = min($minY, $w['top']);
            $maxY = max($maxY, $w['top'] + $w['height']);
            $xCenters[] = $w['cx'];
            $yCenters[] = $w['cy'];
            $heights[] = max(1, $w['height']);
        }

        sort($xCenters);
        sort($yCenters);

        // find two largest gaps in sorted x centers -> column separators
        $xGaps = [];
        for ($i = 1; $i < count($xCenters); $i++) {
            $xGaps[] = $xCenters[$i] - $xCenters[$i - 1];
        }

        // if there are fewer than 2 gaps, fallback to equal thirds
        if (count($xGaps) < 2) {
            $pageWidth = max(1, $maxX - $minX);
            $colW = (int)($pageWidth / 3);
            $columns = [
                ['min_x' => $minX, 'max_x' => $minX + $colW],
                ['min_x' => $minX + $colW + 1, 'max_x' => $minX + ($colW * 2)],
                ['min_x' => $minX + ($colW * 2) + 1, 'max_x' => $maxX],
            ];
        } else {
            // find indices of two largest gaps
            $gapsWithIndex = [];
            for ($i = 0; $i < count($xGaps); $i++) {
                $gapsWithIndex[] = ['gap' => $xGaps[$i], 'idx' => $i];
            }
            usort($gapsWithIndex, function($a, $b) { return $b['gap'] <=> $a['gap']; });
            $first = $gapsWithIndex[0]['idx'];
            $second = $gapsWithIndex[1]['idx'];
            // ensure first < second
            if ($second < $first) { [$first, $second] = [$second, $first]; }

            // separators at midpoints between centers
            $sep1 = (int)(($xCenters[$first] + $xCenters[$first + 1]) / 2);
            $sep2 = (int)(($xCenters[$second] + $xCenters[$second + 1]) / 2);

            $columns = [
                ['min_x' => $minX, 'max_x' => $sep1],
                ['min_x' => $sep1 + 1, 'max_x' => $sep2],
                ['min_x' => $sep2 + 1, 'max_x' => $maxX],
            ];
        }

        // Row detection: group yCenters by vertical proximity using median height
        $medianH = (int)$this->median($heights);
        $rowGapThreshold = max( (int)($medianH * 2.5), 20 ); // tunable

        $rows = [];
        $currentGroup = [];
        $prevY = null;
        foreach ($yCenters as $yc) {
            if ($prevY === null) {
                $currentGroup[] = $yc;
                $prevY = $yc;
                continue;
            }
            if (abs($yc - $prevY) <= $rowGapThreshold) {
                $currentGroup[] = $yc;
            } else {
                // finalize current group
                $rows[] = [ 'min_y' => min($currentGroup) - (int)($medianH/2), 'max_y' => max($currentGroup) + (int)($medianH/2) ];
                $currentGroup = [$yc];
            }
            $prevY = $yc;
        }
        if (!empty($currentGroup)) {
            $rows[] = [ 'min_y' => min($currentGroup) - (int)($medianH/2), 'max_y' => max($currentGroup) + (int)($medianH/2) ];
        }

        // Merge rows that are too small or overlap excessively
        $mergedRows = [];
        foreach ($rows as $r) {
            if (empty($mergedRows)) { $mergedRows[] = $r; continue; }
            $last = end($mergedRows);
            if ($r['min_y'] <= $last['max_y'] + (int)($medianH * 0.5)) {
                // merge
                $mergedRows[count($mergedRows)-1] = [
                    'min_y' => min($last['min_y'], $r['min_y']),
                    'max_y' => max($last['max_y'], $r['max_y'])
                ];
            } else {
                $mergedRows[] = $r;
            }
        }

        return [
            'columns' => $columns,
            'rows' => $mergedRows,
            'page_bounds' => ['min_x' => $minX, 'max_x' => $maxX, 'min_y' => $minY, 'max_y' => $maxY],
        ];
    }

    /**
     * Segment cards using a pre-computed grid (from fallback or fixed).
     */
    protected function segmentCardsFromGrid(array $words, array $grid): array
    {
        if (empty($grid) || empty($words)) {
            return [];
        }

        $cards = [];
        foreach ($grid as $cell) {
            $minX = $cell['min_x'];
            $maxX = $cell['max_x'];
            $minY = $cell['min_y'];
            $maxY = $cell['max_y'];

            $cardWords = array_filter($words, function ($w) use ($minX, $maxX, $minY, $maxY) {
                $wordOverlapsHorizontally = !($w['right'] < $minX || $w['left'] > $maxX);
                $wordOverlapsVertically = !($w['bottom'] < $minY || $w['top'] > $maxY);
                return $wordOverlapsHorizontally && $wordOverlapsVertically;
            });

            if (!empty($cardWords)) {
                $cards[] = [
                    'row' => $cell['row'],
                    'col' => $cell['col'],
                    'min_x' => $minX,
                    'max_x' => $maxX,
                    'min_y' => $minY,
                    'max_y' => $maxY,
                    'words' => array_values($cardWords)
                ];
            }
        }

        Log::info('Card segmentation from fixed grid', [
            'grid_cells' => count($grid),
            'cards_with_words' => count($cards)
        ]);

        return $cards;
    }

    /**
     * Create a fallback grid when automatic detection fails
     */
    protected function createFallbackGrid(array $words): array
    {
        if (empty($words)) {
            return [];
        }
        
        // Calculate page bounds
        $minX = min(array_column($words, 'left'));
        $maxX = max(array_column($words, 'right'));
        $minY = min(array_column($words, 'top'));
        $maxY = max(array_column($words, 'bottom'));
        
        // Create 3x10 grid (standard voter card layout)
        $cols = 3;
        $rows = 10;
        $colWidth = ($maxX - $minX) / $cols;
        $rowHeight = ($maxY - $minY) / $rows;
        
        $cards = [];
        for ($row = 0; $row < $rows; $row++) {
            for ($col = 0; $col < $cols; $col++) {
                $cellMinX = $minX + ($col * $colWidth);
                $cellMaxX = $minX + (($col + 1) * $colWidth);
                $cellMinY = $minY + ($row * $rowHeight);
                $cellMaxY = $minY + (($row + 1) * $rowHeight);
                
                $cellWords = array_filter($words, function($w) use ($cellMinX, $cellMaxX, $cellMinY, $cellMaxY) {
                    return $w['center_x'] >= $cellMinX && $w['center_x'] <= $cellMaxX &&
                           $w['center_y'] >= $cellMinY && $w['center_y'] <= $cellMaxY;
                });
                
                if (!empty($cellWords)) {
                    $cards[] = [
                        'row' => $row,
                        'col' => $col,
                        'min_x' => $cellMinX,
                        'max_x' => $cellMaxX,
                        'min_y' => $cellMinY,
                        'max_y' => $cellMaxY,
                        'words' => array_values($cellWords),
                        'fallback_grid' => true
                    ];
                }
            }
        }
        
        Log::info('Created fallback grid', [
            'cards_detected' => count($cards),
            'grid_size' => "{$cols}x{$rows}"
        ]);
        
        return $cards;
    }

    /**
     * Compute median of numeric array
     */
    private function median(array $values)
    {
        if (empty($values)) return 0;
        sort($values);
        $count = count($values);
        $middle = (int) floor(($count - 1) / 2);
        if ($count % 2) {
            return $values[$middle];
        } else {
            return ($values[$middle] + $values[$middle + 1]) / 2.0;
        }
    }

    /**
     * Optionally compute centers for words (if needed elsewhere)
     */
    private function computeCenters(array $words): array
    {
        $centers = [];
        foreach ($words as $w) {
            $centers[] = ['cx' => $w['cx'], 'cy' => $w['cy']];
        }
        return $centers;
    }

    protected function segmentIntoVoterCards(array $grid): array
    {
        // TODO: return bounding boxes for each voter card
        return [];
    }

    protected function extractCardFields(array $cardBox, array $words, int $serial, ?string $boothNumber): ?array
    {
        // cardBox is produced by segmentCards and contains 'words' for the card
        if (empty($cardBox) || empty($cardBox['words'])) {
            return null;
        }

        // Build a minimal card structure expected by extractVoterFields
        $card = [ 'words' => $cardBox['words'] ];
        
        // Extract serial number from the card itself (small box in top-left)
        $extractedSerial = $this->extractSerialFromCard($cardBox['words']);
        
        // Use extracted serial if found, otherwise use the grid-based serial
        $actualSerial = $extractedSerial ?? $serial;
        
        Log::debug('Processing voter card', [
            'grid_serial' => $serial,
            'extracted_serial' => $extractedSerial,
            'using_serial' => $actualSerial
        ]);

        // Use existing extraction pipeline which already implements robust field extraction
        $voter = $this->extractVoterFields($card, $actualSerial, $boothNumber, $words);

        // If extractVoterFields returned null or missing critical fields, skip
        if ($voter === null) {
            return null;
        }

        // Normalize and map the extracted voter fields to match the Voter model columns
        $normalized = $this->normalizeVoterRecord($voter, $boothNumber);

        return $normalized;
    }
    
    /**
     * Extract serial number from the small box in top-left corner of voter card
     */
    protected function extractSerialFromCard(array $words): ?int
    {
        if (empty($words)) {
            return null;
        }
        
        // Find card bounds
        $minY = min(array_column($words, 'top'));
        $minX = min(array_column($words, 'left'));
        $maxY = max(array_column($words, 'bottom'));
        $maxX = max(array_column($words, 'right'));
        $cardHeight = $maxY - $minY;
        $cardWidth = $maxX - $minX;
        
        // Serial is in a small box in top-left: top 25% of height, left 20% of width
        $topThreshold = $minY + ($cardHeight * 0.25);
        $leftThreshold = $minX + ($cardWidth * 0.20);
        
        $topLeftWords = array_filter($words, function($w) use ($topThreshold, $leftThreshold) {
            return $w['top'] <= $topThreshold && $w['left'] <= $leftThreshold;
        });
        
        // Look for a 1-2 digit number
        foreach ($topLeftWords as $word) {
            if (preg_match('/^(\d{1,2})$/', trim($word['text']), $matches)) {
                $serial = (int)$matches[1];
                if ($serial >= 1 && $serial <= 100) {
                    Log::debug('Extracted serial from card', ['serial' => $serial, 'text' => $word['text']]);
                    return $serial;
                }
            }
        }
        
        return null;
    }

    /**
     * Normalize and map extracted voter fields to the database model shape
     */
    private function normalizeVoterRecord(array $voter, ?string $boothNumber): array
    {
        // Map common keys returned by extractVoterFields to your Voter model fields
        $serialNumber = null;
        if (array_key_exists('serial_no', $voter) && $voter['serial_no'] !== null) {
            $serialNumber = (int)$voter['serial_no'];
        } elseif (array_key_exists('serial_number', $voter) && $voter['serial_number'] !== null) {
            $serialNumber = (int)$voter['serial_number'];
        }

        $mapped = [
            'serial_no'        => $serialNumber,
            'serial_number'    => $serialNumber,
            'ration_card_id'   => $voter['ration_card_id'] ?? null,
            'voter_id_number'  => $voter['voter_id_number'] ?? null,
            'name'             => $voter['name'] ?? null,
            'relation_type'    => isset($voter['relation_type']) ? ucfirst(strtolower($voter['relation_type'])) : null,
            'relation_name'    => $voter['relation_name'] ?? null,
            'house_number'     => $voter['house_number'] ?? null,
            'age'              => isset($voter['age']) ? (int)$voter['age'] : null,
            'gender'           => $this->normalizeGender($voter['gender'] ?? null),
            'year_of_birth'    => $voter['year_of_birth'] ?? null,
            'is_head'          => $voter['is_head'] ?? 0,
            'booth_id'         => $voter['booth_id'] ?? null,
            'booth_number'     => $voter['booth_number'] ?? $boothNumber ?? null,
            'mobile_number'    => $voter['mobile_number'] ?? null,
            'aadhar_number'    => $voter['aadhar_number'] ?? null,
            'street_id'        => $voter['street_id'] ?? null,
            'street_name'      => $voter['street_name'] ?? null,
            'is_deleted'       => !empty($voter['is_deleted']) ? 1 : 0,
        ];

        Log::debug('Normalized voter mapping', ['mapped' => $mapped]);

        // Ensure required minimal fields exist; if not, return minimal mapping anyway
        return $mapped;
    }

    /**
     * Extract TSV data from image using Tesseract with bounding box output
     */
    protected function extractTSVData(?string $imagePath = null): array
    {
        try {
            // Use provided path or try to get from class property
            $targetPath = $imagePath ?? ($this->pagePath ?? null);
            
            if (!$targetPath) {
                Log::warning('No image path available for TSV extraction');
                return [];
            }
            
            // Create temporary TSV output path
            $baseDir = dirname($targetPath);
            $baseName = pathinfo($targetPath, PATHINFO_FILENAME);
            $tsvOutputBase = $baseDir . '/' . $baseName . '_tsv';
            $tsvPath = $tsvOutputBase . '.tsv';
            
            // Run Tesseract with TSV output to get bounding boxes
            $command = sprintf(
                'tesseract %s %s --psm 6 tsv',
                escapeshellarg($targetPath),
                escapeshellarg($tsvOutputBase)
            );
            
            exec($command, $output, $returnCode);
            
            if ($returnCode !== 0 || !file_exists($tsvPath)) {
                Log::warning('TSV generation failed', [
                    'command' => $command, 
                    'return_code' => $returnCode, 
                    'expected_tsv_path' => $tsvPath,
                    'tsv_exists' => file_exists($tsvPath)
                ]);
                return [];
            }
            
            $tsvContent = file_get_contents($tsvPath);
            unlink($tsvPath); // Clean up temporary file
            
            return explode("\n", trim($tsvContent));
            
        } catch (\Exception $e) {
            Log::error('TSV extraction error', ['error' => $e->getMessage()]);
            return [];
        }
    }

    /**
     * Parse TSV data into structured word objects with bounding boxes
     */
    protected function parseTSV(array $tsvLines): array
    {
        $words = [];
        
        foreach ($tsvLines as $lineIndex => $line) {
            $line = trim($line);
            if (empty($line)) continue;
            
            $columns = str_getcsv($line, "\t");
            
            // Skip header line
            if ($lineIndex === 0) continue;
            
            // Ensure we have enough columns (level, page_num, block_num, par_num, line_num, word_num, left, top, width, height, conf, text)
            if (count($columns) < 12) continue;
            
            $text = trim($columns[11] ?? '');
            if (empty($text) || $text === '' || (int)$columns[10] < 30) { // Skip low confidence words
                continue;
            }
            
            $word = [
                'level' => (int)$columns[0],
                'page_num' => (int)$columns[1],
                'block_num' => (int)$columns[2],
                'par_num' => (int)$columns[3],
                'line_num' => (int)$columns[4],
                'word_num' => (int)$columns[5],
                'left' => (int)$columns[6],
                'top' => (int)$columns[7],
                'width' => (int)$columns[8],
                'height' => (int)$columns[9],
                'conf' => (int)$columns[10],
                'text' => $text,
                'right' => (int)$columns[6] + (int)$columns[8],
                'bottom' => (int)$columns[7] + (int)$columns[9],
                'center_x' => (int)$columns[6] + ((int)$columns[8] / 2),
                'center_y' => (int)$columns[7] + ((int)$columns[9] / 2)
            ];
            
            $words[] = $word;
        }
        
        return $words;
    }

    /**
     * Cluster words into columns using K-means clustering on X coordinates
     * Uses center_x for more accurate column detection
     */
    protected function clusterColumns(array $words, int $numColumns = 3): array
    {
        if (empty($words)) return [];
        
        // Extract X coordinates (center of each word for better accuracy)
        $xCoords = array_column($words, 'center_x');
        
        // Perform K-means clustering on X coordinates
        $clusters = $this->kMeansCluster($xCoords, $numColumns);
        
        // Group words by cluster assignment
        $columns = array_fill(0, $numColumns, []);
        foreach ($words as $index => $word) {
            $clusterIndex = $clusters[$index];
            $columns[$clusterIndex][] = $word;
        }
        
        // Sort columns by average X position (left to right)
        uasort($columns, function($a, $b) {
            if (empty($a) || empty($b)) return 0;
            $avgXA = array_sum(array_column($a, 'left')) / count($a);
            $avgXB = array_sum(array_column($b, 'left')) / count($b);
            return $avgXA <=> $avgXB;
        });
        
        return array_values($columns);
    }

    /**
     * Simple K-means clustering implementation for X coordinates
     */
    protected function kMeansCluster(array $xCoords, int $k): array
    {
        if (empty($xCoords)) return [];
        
        $n = count($xCoords);
        if ($n <= $k) {
            return range(0, $n - 1);
        }
        
        // Initialize centroids
        $minX = min($xCoords);
        $maxX = max($xCoords);
        $centroids = [];
        for ($i = 0; $i < $k; $i++) {
            $centroids[$i] = $minX + ($i * ($maxX - $minX) / ($k - 1));
        }
        
        $assignments = array_fill(0, $n, 0);
        $maxIterations = 10;
        
        for ($iter = 0; $iter < $maxIterations; $iter++) {
            $newAssignments = [];
            
            // Assign each point to nearest centroid
            foreach ($xCoords as $i => $x) {
                $bestDist = PHP_FLOAT_MAX;
                $bestCluster = 0;
                
                for ($j = 0; $j < $k; $j++) {
                    $dist = abs($x - $centroids[$j]);
                    if ($dist < $bestDist) {
                        $bestDist = $dist;
                        $bestCluster = $j;
                    }
                }
                
                $newAssignments[$i] = $bestCluster;
            }
            
            // Check for convergence
            if ($newAssignments === $assignments) {
                break;
            }
            
            $assignments = $newAssignments;
            
            // Update centroids
            for ($j = 0; $j < $k; $j++) {
                $clusterPoints = [];
                foreach ($assignments as $i => $cluster) {
                    if ($cluster === $j) {
                        $clusterPoints[] = $xCoords[$i];
                    }
                }
                
                if (!empty($clusterPoints)) {
                    $centroids[$j] = array_sum($clusterPoints) / count($clusterPoints);
                }
            }
        }
        
        return $assignments;
    }

    /**
     * Group words into voter cards by Y coordinate within each column
     */
    protected function groupVotersByY(array $columnWords): array
    {
        if (empty($columnWords)) return [];
        
        // Sort words by Y coordinate (top to bottom)
        usort($columnWords, function($a, $b) {
            return $a['top'] <=> $b['top'];
        });
        
        // Calculate dynamic threshold based on median word height
        $heights = array_column($columnWords, 'height');
        sort($heights);
        $medianHeight = $heights[count($heights) >> 1] ?? 20;
        $yThreshold = $medianHeight * 2.5; // Dynamic threshold
        
        $cards = [];
        $currentCard = ['words' => []];
        $previousY = null;
        
        foreach ($columnWords as $word) {
            // Simple vertical gap detection
            if ($previousY === null || ($word['top'] - $previousY) > $yThreshold) {
                if (!empty($currentCard['words'])) {
                    $cards[] = $currentCard;
                }
                $currentCard = ['words' => [$word]];
            } else {
                $currentCard['words'][] = $word;
            }
            
            $previousY = $word['bottom'];
        }
        
        // Add the last card
        if (!empty($currentCard['words'])) {
            $cards[] = $currentCard;
        }
        
        return $cards;
    }

    /**
     * Extract structured voter fields from a voter card
     */
    protected function extractVoterFields(array $card, int $serialNumber, ?string $boothNumber, array $allWords = []): ?array
    {
        $words = $card['words'];
        if (empty($words)) return null;
        
        // Check for DELETED status
        $allText = implode(' ', array_column($words, 'text'));
        if (stripos($allText, 'DELETED') !== false) {
            return [
                'serial_no' => $serialNumber,
                'voter_id_number' => 'DELETED_' . $serialNumber,
                'name' => 'DELETED',
                'is_deleted' => true,
                'booth_number' => $boothNumber
            ];
        }
        
        // Sort words by Y coordinate for proper field extraction
        usort($words, function($a, $b) {
            if (abs($a['top'] - $b['top']) < 10) { // Same line
                return $a['left'] <=> $b['left'];
            }
            return $a['top'] <=> $b['top'];
        });
        
        // Calculate card bounds for spatial EPIC search
        $cardMinY = min(array_column($words, 'top'));
        $cardMaxY = max(array_column($words, 'bottom'));
        $cardMinX = min(array_column($words, 'left'));
        $cardMaxX = max(array_column($words, 'right'));
        $cardCenterX = ($cardMinX + $cardMaxX) / 2;
        
        // Extract fields using position-based logic
        $name = $this->extractName($words);
        $epicId = $this->extractEpicId($words, $allWords, $cardMinY, $cardCenterX);
        $age = $this->extractAge($words);
        $gender = $this->extractGender($words);
        $houseNumber = $this->extractHouseNumber($words);
        $relation = $this->extractRelation($words);
        
        // Enhanced validation with better logging and fallback extraction
        $originalName = $name;
        $originalEpicId = $epicId;
        
        // Try alternative extraction methods if primary methods failed
        if (empty($name)) {
            $name = $this->extractNameFallback($words);
        }
        if (empty($epicId)) {
            $epicId = $this->extractEpicIdFallback($words, $allWords);
        }
        
        // More lenient validation - require either name OR epic ID (not both)
        if (empty($name) && empty($epicId)) {
            // Debug: show detailed card analysis for completely failed extractions
            $allText = implode(' ', array_column($words, 'text'));
            
            Log::warning('Skipping voter card - no name or EPIC ID extracted', [
                'serial_number' => $serialNumber,
                'words_count' => count($words),
                'all_card_text' => $allText,
                'extraction_attempts' => [
                    'original_name' => $originalName,
                    'fallback_name' => $name,
                    'original_epic' => $originalEpicId, 
                    'fallback_epic' => $epicId
                ]
            ]);
            return null;
        }
        
        // Create fallback values for missing fields
        if (empty($name)) {
            $name = 'Unknown_' . $serialNumber;
            Log::info('Using fallback name for voter', [
                'serial_number' => $serialNumber,
                'epic_id' => $epicId,
                'fallback_name' => $name
            ]);
        }
        
        if (empty($epicId)) {
            $epicId = 'MISSING_' . str_pad($serialNumber, 7, '0', STR_PAD_LEFT);
            Log::info('Using fallback EPIC ID for voter', [
                'serial_number' => $serialNumber,
                'voter_name' => $name,
                'fallback_epic' => $epicId
            ]);
        }
        
        // Calculate year of birth
        $yearOfBirth = 1901; // Default fallback
        if ($age && $age >= 18 && $age <= 120) {
            $yearOfBirth = (int)date('Y') - $age;
        }
        
        return [
            'serial_no' => $serialNumber,
            'voter_id_number' => $epicId,
            'name' => $name,
            'relation_type' => $relation['type'] ?? null,
            'relation_name' => $relation['name'] ?? null,
            'house_number' => $houseNumber,
            'age' => $age,
            'gender' => $this->normalizeGender($gender),
            'year_of_birth' => $yearOfBirth,
            'booth_number' => $boothNumber,
            'is_deleted' => false
        ];
    }

    /**
     * Normalize spaced text: "S C O 0 0 8 9 7 1 4" → "SCO0089714"
     */
    protected function normalizeSpacedText(string $text): string
    {
        // Remove spaces between single characters
        if (preg_match('/^([A-Z0-9]\s){2,}[A-Z0-9]$/', $text)) {
            return str_replace(' ', '', $text);
        }
        return $text;
    }
    
    /**
     * Fallback name extraction with more aggressive patterns
     */
    protected function extractNameFallback(array $words): ?string
    {
        // Try to find any text that looks like a name (capitalized words)
        foreach ($words as $word) {
            $text = trim($word['text']);
            // Look for capitalized text that's not EPIC ID, numbers, or common keywords
            if (preg_match('/^[A-Z][A-Z\s]{2,}$/', $text) && 
                !preg_match('/^[A-Z]{3}[0-9]{7}$/', str_replace(' ', '', $text)) &&
                !preg_match('/^(MALE|FEMALE|FATHER|HUSBAND|MOTHER|PHOTO|AVAILABLE|HOUSE|NUMBER|AGE|GENDER)$/i', $text) &&
                strlen($text) > 3) {
                return $text;
            }
        }
        
        // Second attempt: look for any text between 4-30 characters with letters
        foreach ($words as $word) {
            $text = trim($word['text']);
            if (strlen($text) >= 4 && strlen($text) <= 30 && 
                preg_match('/[A-Za-z]/', $text) && 
                !preg_match('/[0-9]{3,}/', $text)) {
                return strtoupper($text);
            }
        }
        
        return null;
    }
    
    /**
     * Fallback EPIC ID extraction with more patterns
     */
    protected function extractEpicIdFallback(array $words, array $allWords): ?string
    {
        // Combine all text and look for EPIC patterns
        $allText = implode(' ', array_column($words, 'text'));
        
        // Pattern 1: Standard format but with spaces or slight OCR errors
        if (preg_match('/([A-Z]{2,3}\s*[0-9O]{7,8})/', $allText, $match)) {
            $epic = preg_replace('/\s+/', '', $match[1]);
            $epic = str_replace('O', '0', $epic); // Fix common OCR error
            if (preg_match('/^[A-Z]{3}[0-9]{7}$/', $epic)) {
                return $epic;
            }
        }
        
        // Pattern 2: Look for any 10-character alphanumeric string
        if (preg_match('/([A-Z0-9]{10})/', str_replace(' ', '', $allText), $match)) {
            $candidate = $match[1];
            // Check if it fits EPIC pattern (3 letters + 7 digits)
            if (preg_match('/^[A-Z]{3}[0-9]{7}$/', $candidate)) {
                return $candidate;
            }
        }
        
        // Pattern 3: Look in nearby text from allWords array
        foreach ($words as $cardWord) {
            foreach ($allWords as $nearbyWord) {
                // Check if words are spatially close
                $distance = sqrt(
                    pow($cardWord['cx'] - $nearbyWord['cx'], 2) + 
                    pow($cardWord['cy'] - $nearbyWord['cy'], 2)
                );
                
                if ($distance < 100) { // Within 100 pixels
                    $text = str_replace(' ', '', $nearbyWord['text']);
                    if (preg_match('/^[A-Z]{3}[0-9]{7}$/', $text)) {
                        return $text;
                    }
                }
            }
        }
        
        return null;
    }
    
    /**
     * Extract voter name from words
     */
    protected function extractName(array $words): ?string
    {
        $nameCandidate = null;
        $nameLine = null;
        
        foreach ($words as $i => $word) {
            // Check for "Name" label (but not relation names)
            if (preg_match('/^Name\s*[:=]?$/i', $word['text'])) {
                // Check if preceded by relation keyword
                if ($i > 0) {
                    $prevText = $words[$i - 1]['text'];
                    if (preg_match('/(Father|Husband|Mother|Guardian)$/i', $prevText)) {
                        continue; // This is a relation name, skip
                    }
                }
                
                // Value attached?
                $selfValue = preg_replace('/^Name\s*[:=\-]?\s*/i', '', $word['text']);
                if (strlen($selfValue) > 1) {
                    return strtoupper(preg_replace('/[^A-Z\s]/', '', $selfValue));
                }
                
                // Look for name on same line (within 15px Y)
                $nameLine = $word['top'];
                for ($j = 1; $j <= 5; $j++) {
                    if (!isset($words[$i + $j])) break;
                    $nextWord = $words[$i + $j];
                    
                    // Must be on same line
                    if (abs($nextWord['top'] - $nameLine) > 15) break;
                    
                    $text = trim($nextWord['text']);
                    
                    // Skip separators
                    if (preg_match('/^[:=\-\.\s]+$/', $text)) continue;
                    
                    // Found name - collect all parts on same line
                    if (preg_match('/^[A-Z][A-Z\s\.]{1,30}$/i', $text) && 
                        !preg_match('/^(Father|Husband|Mother|Guardian|House|Age|Gender|Photo|Available|Number)$/i', $text)) {
                        
                        $nameCandidate = $text;
                        
                        // Collect additional name parts on same line
                        for ($k = $j + 1; $k <= $j + 3; $k++) {
                            if (!isset($words[$i + $k])) break;
                            $following = $words[$i + $k];
                            
                            if (abs($following['top'] - $nameLine) > 15) break;
                            
                            if (preg_match('/^[A-Z\.]+$/i', $following['text'])) {
                                $nameCandidate .= ' ' . $following['text'];
                            } else {
                                break;
                            }
                        }
                        
                        return strtoupper(preg_replace('/[^A-Z\s]/', '', $nameCandidate));
                    }
                }
            }
        }
        
        // Fallback: look for capitalized words that could be names
        foreach ($words as $word) {
            if (preg_match('/^[A-Z][A-Z\s\.\']{2,25}$/', $word['text']) && 
                !preg_match('/^(MALE|FEMALE|FATHER|HUSBAND|MOTHER|PHOTO|AVAILABLE|NAME|HOUSE|NUMBER|AGE|GENDER|SERIAL|VOTER|DELETED|ELECTION|COMMISSION|IDENTITY|CARD)$/i', $word['text'])) {
                return strtoupper(trim($word['text']));
            }
        }
        
        return null;
    }

    /**
     * Extract EPIC ID from words
     * Searches for EPIC ID above the card in the header section using spatial and serial number matching
     */
    protected function extractEpicId(array $words, array $allWords = [], ?float $cardMinY = null, ?float $cardCenterX = null): ?string
    {
        // PRIMARY STRATEGY: Look for EPIC ID directly inside the voter card (top-right area)
        // The EPIC ID is printed INSIDE each card box, not in a separate header
        if (!empty($words)) {
            $minY = min(array_column($words, 'top'));
            $maxY = max(array_column($words, 'bottom'));
            $minX = min(array_column($words, 'left'));
            $maxX = max(array_column($words, 'right'));
            $cardHeight = $maxY - $minY;
            $cardWidth = $maxX - $minX;
            
            // Strategy 1: Search entire card for EPIC ID pattern first (most reliable)
            // EPIC IDs have format: 2-3 letters + 7-10 digits (XDQ0234484, DGL0199083, etc)
            foreach ($words as $word) {
                $text = $word['text'];
                $normalized = $this->normalizeSpacedText($text);
                $cleaned = preg_replace('/[^A-Z0-9]/', '', strtoupper($normalized));
                
                // Handle common OCR errors
                $cleaned = str_replace(['$', '¢', '€'], 'S', $cleaned);
                $cleaned = str_replace(['O', 'o'], '0', $cleaned);
                $cleaned = str_replace(['l', 'I', '|'], '1', $cleaned);
                
                // Check if it matches EPIC format (2-3 letters + 7-10 digits)
                if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $cleaned)) {
                    Log::debug('Found EPIC ID in card', [
                        'epic' => $cleaned, 
                        'original' => $text,
                        'position' => ['x' => $word['left'], 'y' => $word['top']]
                    ]);
                    return $cleaned;
                }
            }
            
            // Strategy 2: If not found, look specifically in top-right area (where it should be)
            $topThreshold = $minY + ($cardHeight * 0.4); // Top 40% of card
            $rightThreshold = $minX + ($cardWidth * 0.4); // Right 60% of card width
            
            $topRightWords = array_filter($words, function($w) use ($topThreshold, $rightThreshold) {
                return $w['top'] <= $topThreshold && $w['left'] >= $rightThreshold;
            });
            
            foreach ($topRightWords as $word) {
                $text = $word['text'];
                // Look for any alphanumeric sequence that might be an EPIC
                if (preg_match('/([A-Z]{2,3}[A-Z0-9]{7,10})/i', $text, $matches)) {
                    $cleaned = strtoupper(preg_replace('/[^A-Z0-9]/', '', $matches[1]));
                    $cleaned = str_replace(['O', 'o'], '0', $cleaned);
                    $cleaned = str_replace(['l', 'I', '|'], '1', $cleaned);
                    
                    if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $cleaned)) {
                        Log::debug('Found EPIC in top-right area', ['epic' => $cleaned, 'original' => $text]);
                        return $cleaned;
                    }
                }
            }
        }
        
        // Strategy 2: Search spatially ABOVE the card (fallback)
        if (!empty($allWords) && $cardMinY !== null && $cardCenterX !== null) {
            // Look for EPIC IDs within 600px above the card, horizontally aligned (±250px from center)
            // Increased from 300px to handle multiple rows of voter cards
            $candidateWords = array_filter($allWords, function($w) use ($cardMinY, $cardCenterX) {
                $isAbove = $w['bottom'] < $cardMinY + 50; // Allow small overlap
                $isWithinRange = $w['bottom'] > $cardMinY - 600; // Within 600px above (handles 2-3 cards)
                $isAligned = abs($w['center_x'] - $cardCenterX) < 250; // Horizontally aligned
                return $isAbove && $isWithinRange && $isAligned;
            });
            
            Log::debug('EPIC spatial search', [
                'cardMinY' => $cardMinY,
                'cardCenterX' => $cardCenterX,
                'candidates' => array_map(fn($w) => ['text' => $w['text'], 'y' => $w['bottom'], 'x' => $w['center_x']], array_values($candidateWords))
            ]);
            
            // Try each candidate, prioritizing closest to card
            usort($candidateWords, function($a, $b) use ($cardMinY) {
                return ($cardMinY - $b['bottom']) <=> ($cardMinY - $a['bottom']); // Closest first
            });
            
            foreach ($candidateWords as $word) {
                $text = $word['text'];
                $normalized = $this->normalizeSpacedText($text);
                $cleaned = preg_replace('/[^A-Z0-9]/', '', strtoupper($normalized));
                
                // Handle OCR errors
                $cleaned = str_replace(['$', '¢', '€'], 'S', $cleaned);
                $cleaned = str_replace(['O', 'o'], '0', $cleaned);
                $cleaned = str_replace(['l', 'I', '|'], '1', $cleaned);
                
                if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $cleaned)) {
                    Log::debug('Found EPIC ID spatially', ['epic' => $cleaned, 'original' => $text]);
                    return $cleaned;
                }
            }
        }
        
        // Strategy 2: Look for EPIC in top-right area within card (fallback)
        if (!empty($words)) {
            $minY = min(array_column($words, 'top'));
            $maxY = max(array_column($words, 'bottom'));
            $cardHeight = $maxY - $minY;
            $topThreshold = $minY + ($cardHeight * 0.4); // Top 40% of card
            
            $maxX = max(array_column($words, 'right'));
            $rightThreshold = $maxX - 200; // Right 200px area
            
            // Collect all words in top-right quadrant
            $topRightWords = [];
            foreach ($words as $word) {
                if ($word['top'] <= $topThreshold && $word['right'] >= $rightThreshold) {
                    $topRightWords[] = $word;
                }
            }
            
            // Try to merge adjacent characters that might be spaced EPIC IDs
            foreach ($topRightWords as $i => $word) {
                $candidates = [$word['text']];
                
                // Look for adjacent words on same line (within 10px Y difference)
                for ($j = $i + 1; $j < count($topRightWords); $j++) {
                    $next = $topRightWords[$j];
                    if (abs($next['top'] - $word['top']) > 10) break;
                    if ($next['left'] - $word['right'] > 50) break; // Too far apart
                    
                    $candidates[] = $next['text'];
                    if (count($candidates) >= 10) break; // Max 10 characters for EPIC
                }
                
                // Try combining candidates
                $combined = implode('', $candidates);
                $normalized = $this->normalizeSpacedText($combined);
                $cleaned = preg_replace('/[^A-Z0-9]/', '', strtoupper($normalized));
                
                // Handle OCR errors: $ → S, O → 0, l/I → 1
                $cleaned = str_replace(['$', '¢', '€'], 'S', $cleaned);
                $cleaned = str_replace(['O', 'o'], '0', $cleaned);
                $cleaned = str_replace(['l', 'I', '|'], '1', $cleaned);
                
                // Match standard EPIC pattern
                if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $cleaned)) {
                    return $cleaned;
                }
            }
        }
        
        // Strategy 2: Look for "Voter Number" label
        foreach ($words as $i => $word) {
            $text = $word['text'];
            
            if (preg_match('/(Voter.*Number|Number)/i', $text)) {
                // Check same text
                $cleanSame = preg_replace('/(Voter|Number|[:=\-\s])+/i', '', $text);
                $cleanSame = preg_replace('/[^A-Z0-9]/', '', strtoupper($cleanSame));
                if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $cleanSame)) {
                    return $cleanSame;
                }
                
                // Look for ID in same horizontal line (within 20px Y)
                for ($j = 1; $j <= 5; $j++) {
                    if (!isset($words[$i + $j])) break;
                    $nextWord = $words[$i + $j];
                    
                    if (abs($nextWord['top'] - $word['top']) > 20) break;
                    
                    $nextText = preg_replace('/[^A-Z0-9]/', '', strtoupper($nextWord['text']));
                    if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $nextText)) {
                        return $nextText;
                    }
                }
            }

            // Strategy 2: Direct pattern matching for EPIC ID anywhere in the word list
            // Remove all spaces first, then check pattern
            $noSpace = str_replace(' ', '', $text);
            
            // Look for standard EPIC ID patterns: SCO0089714, WFJ0625897, LXL0281543, etc.
            if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $noSpace)) {
                return strtoupper($noSpace);
            }
            
            // Handle OCR errors with special characters
            $cleanText = $noSpace;
            
            // Replace special characters that look like letters
            $cleanText = str_replace(['$', '¢', '€'], ['S', 'C', 'C'], $cleanText);
            
            // Replace letters that look like numbers
            $cleanText = str_replace(['O', 'o'], '0', $cleanText);
            $cleanText = str_replace(['l', 'I', '|'], '1', $cleanText);
            
            // Remove any remaining non-alphanumeric characters
            $cleanText = preg_replace('/[^A-Z0-9]/', '', strtoupper($cleanText));
            
            // Try with cleaned text
            if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $cleanText)) {
                return $cleanText;
            }
            
            // Strategy 3: Handle special character prefixes ($C, ¢C, etc.)
            if (preg_match('/[\$¢€¥§]/', $text)) {
                $cleaned = str_replace(['$', '¢', '€', '¥', '§'], 'S', $text);
                $cleaned = preg_replace('/[^A-Z0-9]/', '', strtoupper($cleaned));
                
                // If we get something like "SC" followed by digits, it's likely correct
                if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $cleaned)) {
                    return $cleaned;
                }
            }
        }

        // Strategy 4: Aggressive fallback - look for any alphanumeric that could be an ID
        foreach ($words as $word) {
            $text = $word['text'];
            $cleaned = preg_replace('/[^A-Z0-9]/', '', strtoupper($text));
            
            // Handle leading digit that should be 'S' (common OCR error)
            if (preg_match('/^[0-9][A-Z0-9]{9,11}$/', $cleaned)) {
                // Try replacing leading digit with 'S'
                $withS = 'S' . substr($cleaned, 1);
                if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $withS)) {
                    return $withS;
                }
            }
            
            // Standard fallback pattern
            if (preg_match('/^[A-Z]{2,3}[0-9]{7,10}$/', $cleaned)) {
                return $cleaned;
            }
        }

        // Strategy 5: Last resort - pure numeric IDs (old format)
        foreach ($words as $word) {
            $text = $word['text'];
            $cleanText = preg_replace('/[^0-9]/', '', $text);
            
            // Accept 7-10 digit numeric strings as valid IDs
            if (preg_match('/^[0-9]{7,10}$/', $cleanText)) {
                return $cleanText;
            }
        }

        return null;
    }

    /**
     * Extract age from words
     */
    protected function extractAge(array $words): ?int
    {
        foreach ($words as $i => $word) {
            // Look for Age label
            if (preg_match('/^Age\s*[:=]?$/i', $word['text'])) {
                // Look for age in same word or next word
                $ageText = preg_replace('/Age\s*[:=\+]?\s*/i', '', $word['text']);
                if (preg_match('/(\d{1,3})/', $ageText, $matches)) {
                    $age = (int)$matches[1];
                    if ($age >= 18 && $age <= 120) {
                        return $age;
                    }
                }
                
                // Check next words, handle split digits like "6 1" → 61
                $ageCandidate = '';
                for ($j = 1; $j <= 4; $j++) {
                    if (!isset($words[$i + $j])) break;
                    $nextWord = $words[$i + $j];
                    
                    // Skip separators
                    if (preg_match('/^[:=\-\.]+$/', $nextWord['text'])) continue;
                    
                    // Collect digits (might be split across words)
                    if (preg_match('/^\d{1,2}$/', $nextWord['text'])) {
                        $ageCandidate .= $nextWord['text'];
                        
                        // Check if we have a valid age
                        if (strlen($ageCandidate) >= 2) {
                            $age = (int)$ageCandidate;
                            if ($age >= 18 && $age <= 120) {
                                return $age;
                            }
                        }
                    } else if (!empty($ageCandidate)) {
                        // Non-digit found, stop collecting
                        break;
                    }
                }
                
                // Check collected candidate
                if (!empty($ageCandidate)) {
                    $age = (int)$ageCandidate;
                    if ($age >= 18 && $age <= 120) {
                        return $age;
                    }
                }
            }
        }
        
        return null;
    }

    /**
     * Extract gender from words
     */
    protected function extractGender(array $words): ?string
    {
        foreach ($words as $i => $word) {
            $text = strtolower($word['text']);
            if (in_array($text, ['male', 'female', 'm', 'f'])) {
                return $text;
            }
            if (stripos($word['text'], 'Gender') !== false) {
                $genderText = preg_replace('/Gender\s*[:=]?\s*/i', '', $word['text']);
                $genderText = strtolower(trim($genderText));
                if (in_array($genderText, ['male', 'female', 'm', 'f'])) {
                    return $genderText;
                }
                
                // Check next words
                for ($j = 1; $j <= 2; $j++) {
                    if (!isset($words[$i + $j])) break;
                    $nextWord = $words[$i + $j];
                    $nextText = strtolower(trim($nextWord['text']));
                    if (preg_match('/^[:=\-\.]+$/', $nextText)) continue;
                    
                    if (in_array($nextText, ['male', 'female', 'm', 'f'])) {
                        return $nextText;
                    }
                }
            }
        }
        
        return null;
    }

    /**
     * Extract house number from words
     */
    protected function extractHouseNumber(array $words): ?string
    {
        foreach ($words as $i => $word) {
            // Look for House Number label
            if (preg_match('/House.*Number/i', $word['text'])) {
                // Look for house number in same word or next word
                $houseText = preg_replace('/House\s*(?:Number|No|No\.)?\s*[:=]?\s*/i', '', $word['text']);
                $houseText = trim(str_replace('Photo', '', $houseText));
                if (!empty($houseText) && preg_match('/^[A-Z0-9\-\/,\.\s]{1,10}$/', $houseText)) {
                    return trim($houseText);
                }
                
                // Check next words
                for ($j = 1; $j <= 3; $j++) {
                    if (!isset($words[$i + $j])) break;
                    $nextWord = $words[$i + $j];
                    $nextText = trim(str_replace('Photo', '', $nextWord['text']));
                    
                    // Skip "Number", "No", ":", etc.
                    if (preg_match('/^(Number|No|No\.|[:=\-\.]+)$/i', $nextText)) continue;
                    
                    if (!empty($nextText) && preg_match('/^[A-Z0-9\-\/,\.\s]{1,10}$/', $nextText)) {
                        return trim($nextText);
                    }
                }
            }
        }
        
        return null;
    }

    /**
     * Extract relation information from words
     */
    protected function extractRelation(array $words): array
    {
        $relation = ['type' => null, 'name' => null];
        
        foreach ($words as $i => $word) {
            if (preg_match('/(Father|Husband|Mother|Guardian)\'?s?\s*(Name)?/i', $word['text'], $matches)) {
                $relation['type'] = strtolower(preg_replace('/\'?s?$/', '', $matches[1]));
                
                // Look for relation name in the rest of the text or next words
                $nameText = preg_replace('/(Father|Husband|Mother|Guardian)\'?s?\s*(Name)?\s*[:=]?\s*/i', '', $word['text']);
                $nameText = trim($nameText);
                
                if (!empty($nameText) && strlen($nameText) > 1) {
                    $relation['name'] = strtoupper($nameText);
                } else {
                    // Look ahead
                    for ($j = 1; $j <= 3; $j++) {
                        if (!isset($words[$i + $j])) break;
                        $nextWord = $words[$i + $j];
                        $nextText = trim($nextWord['text']);
                        
                        // Skip "Name", ":", etc.
                        if (preg_match('/^(Name|[:=\-\.]+)$/i', $nextText)) continue;
                        
                        if (preg_match('/^[A-Z][A-Z\s\.\']{1,25}$/', $nextText)) {
                            $relation['name'] = strtoupper($nextText);
                            
                            // Check for multi-word name
                            if (isset($words[$i + $j + 1])) {
                                $following = $words[$i + $j + 1];
                                if (abs($following['top'] - $nextWord['top']) < 20 && 
                                    preg_match('/^[A-Z\.]+$/', $following['text'])) {
                                    $relation['name'] .= ' ' . $following['text'];
                                }
                            }
                            break;
                        }
                    }
                }
                
                break;
            }
        }
        
        return $relation;
    }

    /**
     * Fallback to simple text parsing when TSV is not available
     */
    protected function fallbackTextParsing(string $text, ?string $boothNumber): array
    {
        Log::info('Using fallback text parsing');
        
        // Simple regex-based extraction as fallback
        $voters = [];
        $lines = explode("\n", $text);
        $serialCounter = 1;
        
        foreach ($lines as $line) {
            if (preg_match('/Name\s*[:=]\s*([A-Z\s\.\']{2,25})/i', $line, $nameMatch)) {
                $name = trim($nameMatch[1]);
                
                // Generate placeholder EPIC ID
                $epicId = 'SCO' . str_pad($serialCounter * 1000 + rand(100, 999), 7, '0', STR_PAD_LEFT);
                
                $voters[] = [
                    'serial_no' => $serialCounter,
                    'voter_id_number' => $epicId,
                    'name' => strtoupper($name),
                    'relation_type' => null,
                    'relation_name' => null,
                    'house_number' => null,
                    'age' => null,
                    'gender' => 'other',
                    'year_of_birth' => 1901,
                    'booth_number' => $boothNumber,
                    'is_deleted' => false
                ];
                
                $serialCounter++;
            }
        }
        
        return $voters;
    }

    private function normalizeGender(?string $gender): string
    {
        if (!$gender) return 'other';
        
        return match(strtolower(trim($gender))) {
            'male','m' => 'male',
            'female','f','fale' => 'female', // Handle OCR error 'fale'
            'third' => 'other',
            'other','o' => 'other',
            default => 'other'
        };
    }

    /**
     * Generate SQL insert statement for debugging
     */
    protected function generateSQL(array $voters): string
    {
        if (empty($voters)) return '';
        
        $sql = "INSERT INTO voters (serial_number, voter_id_number, name, relation_type, relation_name, house_number, age, gender, year_of_birth, booth_number, is_deleted) VALUES\n";
        $values = [];
        
        foreach ($voters as $voter) {
            $values[] = sprintf(
                "(%d, '%s', '%s', %s, %s, %s, %s, '%s', %d, %s, %d)",
                $voter['serial_no'],
                addslashes($voter['voter_id_number']),
                addslashes($voter['name']),
                $voter['relation_type'] ? "'" . addslashes($voter['relation_type']) . "'" : 'NULL',
                $voter['relation_name'] ? "'" . addslashes($voter['relation_name']) . "'" : 'NULL',
                $voter['house_number'] ? "'" . addslashes($voter['house_number']) . "'" : 'NULL',
                $voter['age'] ?? 'NULL',
                $voter['gender'],
                $voter['year_of_birth'],
                $voter['booth_number'] ? "'" . $voter['booth_number'] . "'" : 'NULL',
                $voter['is_deleted'] ? 1 : 0
            );
        }
        
        return $sql . implode(",\n", $values) . ";";
    }

    // Legacy method retained for interface compatibility (not used by new parser)
    protected function buildVoterArray(array $data): array
    {
        return $data; // New parser already builds normalized data
    }
}