<?php
namespace App\Services;

use Aws\Textract\TextractClient;
use Aws\Exception\AwsException;
use InvalidArgumentException;
use RuntimeException;

class OcrService
{
    protected TextractClient $client;

    public function __construct(array $config = [])
    {
        $region = $config['region'] ?? getenv('AWS_REGION') ?? getenv('AWS_DEFAULT_REGION') ?? 'us-east-1';

        $clientConfig = array_merge([
            'version' => '2018-06-27',
            'region'  => $region,
        ], $config);

        $this->client = new TextractClient($clientConfig);
    }

    /**
     * Extrae campos básicos (nombre completo, dirección, ciudad, estado, zip, país)
     */
    public function extractFields(string $filePath): array
    {
        if (!is_file($filePath)) {
            throw new InvalidArgumentException('File not found for OCR');
        }

        $text = $this->extractText($filePath);
        $lines = array_values(array_filter(array_map('trim', preg_split('/\r\n|\r|\n/', $text))));

        $name = $this->pickLikelyName($lines);
        [$address, $city, $state, $zip] = $this->pickLikelyAddress($lines);

        return [
            'name'    => $name,
            'address' => $address,
            'city'    => $city,
            'state'   => $state,
            'zip'     => $zip,
            'country' => '',
            'raw'     => $text,
        ];
    }

    /**
     * Devuelve todo el texto OCR plano
     */
    protected function extractText(string $filePath): string
    {
        try {
            $result = $this->client->detectDocumentText([
                'Document' => [
                    'Bytes' => file_get_contents($filePath),
                ],
            ]);
        } catch (AwsException $e) {
            throw new RuntimeException('Textract error: ' . $e->getAwsErrorMessage(), 0, $e);
        }

        $blocks = $result->get('Blocks') ?? [];
        $lines = [];
        foreach ($blocks as $block) {
            if (($block['BlockType'] ?? '') === 'LINE' && !empty($block['Text'])) {
                $lines[] = $block['Text'];
            }
        }

        return implode("\n", $lines);
    }

    protected function pickLikelyName(array $lines): string
    {
        foreach ($lines as $line) {
            // Heurística: 2-4 palabras, sin dígitos
            if (preg_match('/^[A-Za-zÁÉÍÓÚÜÑ][A-Za-zÁÉÍÓÚÜÑ\\-\\.\\s]{3,}$/u', $line) && !preg_match('/\\d/', $line)) {
                $words = preg_split('/\\s+/', $line);
                if (count($words) >= 2 && count($words) <= 4) {
                    return $line;
                }
            }
        }
        return '';
    }

    protected function pickLikelyAddress(array $lines): array
    {
        $address = $city = $state = $zip = '';

        // Buscar línea con número + calle
        foreach ($lines as $line) {
            if (preg_match('/^\\d+\\s+.+/u', $line)) {
                $address = $line;
                break;
            }
        }

        // Buscar ciudad/estado/zip (US)
        foreach ($lines as $line) {
            if (preg_match('/^(.+),\\s*([A-Z]{2})\\s+(\\d{5}(?:-\\d{4})?)/', $line, $m)) {
                $city = trim($m[1]);
                $state = $m[2];
                $zip = $m[3];
                break;
            }
        }

        return [$address, $city, $state, $zip];
    }
}
