<?php

require_once __DIR__ . '/vendor/autoload.php';

use thiagoalessio\TesseractOCR\TesseractOCR;

// Manually test OCR on the entire voters 46.png to see what we get
$imagePath = __DIR__ . '/Output/voters 46.png';

echo "=== Testing Full Page OCR on voters 46.png ===\n\n";

if (!file_exists($imagePath)) {
    echo "File not found: $imagePath\n";
    exit(1);
}

$ocr = new TesseractOCR($imagePath);
$ocr->lang('eng')->psm(3); // Fully automatic page segmentation
$text = $ocr->run();

// Save the full OCR text for analysis
file_put_contents(__DIR__ . '/full_page_ocr_output.txt', $text);

echo "Full OCR text saved to: full_page_ocr_output.txt\n\n";

// Look for all voter IDs in the text
preg_match_all('/\b([A-Z]{3}[0-9]{7}|[A-Z]{2}[A-Z0][0-9]{7})\b/i', $text, $matches);
$voterIds = array_unique($matches[1]);

echo "Total voter IDs found: " . count($voterIds) . "\n";
echo "Voter IDs:\n";
foreach ($voterIds as $id) {
    echo "  - " . strtoupper($id) . "\n";
}

// Look for all names (pattern: Name : XXXXX)
preg_match_all('/Name\s*:\s*([A-Z][A-Z\s]+?)(?=\s*(?:Father|Husband|Mother|House|Age|$))/im', $text, $nameMatches);
$names = array_slice($nameMatches[1], 0, 30); // First 30 names

echo "\nFirst 30 names found:\n";
foreach ($names as $idx => $name) {
    echo sprintf("%2d. %s\n", $idx + 1, trim($name));
}